mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
89 lines
3.3 KiB
Python
89 lines
3.3 KiB
Python
import os
|
|
import sys
|
|
|
|
from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors
|
|
from mineru.utils.config_reader import get_device
|
|
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
|
|
|
from vllm.entrypoints.cli.main import main as vllm_main
|
|
|
|
|
|
def main():
|
|
args = sys.argv[1:]
|
|
|
|
has_port_arg = False
|
|
has_gpu_memory_utilization_arg = False
|
|
has_logits_processors_arg = False
|
|
has_block_size_arg = False
|
|
has_compilation_config = False
|
|
model_path = None
|
|
model_arg_indices = []
|
|
|
|
# 检查现有参数
|
|
for i, arg in enumerate(args):
|
|
if arg == "--port" or arg.startswith("--port="):
|
|
has_port_arg = True
|
|
if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
|
|
has_gpu_memory_utilization_arg = True
|
|
if arg == "--logits-processors" or arg.startswith("--logits-processors="):
|
|
has_logits_processors_arg = True
|
|
if arg == "--block-size" or arg.startswith("--block-size="):
|
|
has_block_size_arg = True
|
|
if arg == "--compilation-config" or arg.startswith("--compilation-config="):
|
|
has_compilation_config = True
|
|
if arg == "--model":
|
|
if i + 1 < len(args):
|
|
model_path = args[i + 1]
|
|
model_arg_indices.extend([i, i + 1])
|
|
elif arg.startswith("--model="):
|
|
model_path = arg.split("=", 1)[1]
|
|
model_arg_indices.append(i)
|
|
|
|
# 从参数列表中移除 --model 参数
|
|
if model_arg_indices:
|
|
for index in sorted(model_arg_indices, reverse=True):
|
|
args.pop(index)
|
|
|
|
custom_logits_processors = enable_custom_logits_processors()
|
|
|
|
# 添加默认参数
|
|
if not has_port_arg:
|
|
args.extend(["--port", "30000"])
|
|
if not has_gpu_memory_utilization_arg:
|
|
gpu_memory_utilization = str(set_default_gpu_memory_utilization())
|
|
args.extend(["--gpu-memory-utilization", gpu_memory_utilization])
|
|
if not model_path:
|
|
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
if (not has_logits_processors_arg) and custom_logits_processors:
|
|
args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
|
|
|
|
# musa vllm v1 引擎特殊配置
|
|
# device = get_device()
|
|
# if device.startswith("musa"):
|
|
# import torch
|
|
# if torch.musa.is_available():
|
|
# if not has_block_size_arg:
|
|
# args.extend(["--block-size", "32"])
|
|
# if not has_compilation_config:
|
|
# args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}'])
|
|
|
|
# corex vllm v1 引擎特殊配置
|
|
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
|
if device_type.lower() == "corex":
|
|
if not has_compilation_config:
|
|
args.extend(["--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}'])
|
|
|
|
# 重构参数,将模型路径作为位置参数
|
|
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
|
|
|
if os.getenv('OMP_NUM_THREADS') is None:
|
|
os.environ["OMP_NUM_THREADS"] = "1"
|
|
|
|
# 启动vllm服务器
|
|
print(f"start vllm server: {sys.argv}")
|
|
vllm_main()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|