From 73b31d1118281062513fd66d88d5c391f9225355 Mon Sep 17 00:00:00 2001 From: myhloli Date: Thu, 5 Feb 2026 14:25:43 +0800 Subject: [PATCH] feat: add Kunlunxin platform documentation and Dockerfile for vLLM support --- docker/china/kxpu.Dockerfile | 33 +++++ .../usage/acceleration_cards/IluvatarCorex.md | 2 +- docs/zh/usage/acceleration_cards/Kunlunxin.md | 126 ++++++++++++++++++ .../usage/acceleration_cards/MooreThreads.md | 1 + mineru/backend/vlm/utils.py | 118 +++++++++++++++- mineru/backend/vlm/vlm_analyze.py | 47 +------ mineru/model/vlm/vllm_server.py | 26 +--- 7 files changed, 284 insertions(+), 69 deletions(-) create mode 100644 docker/china/kxpu.Dockerfile create mode 100644 docs/zh/usage/acceleration_cards/Kunlunxin.md diff --git a/docker/china/kxpu.Dockerfile b/docker/china/kxpu.Dockerfile new file mode 100644 index 00000000..91605ee4 --- /dev/null +++ b/docker/china/kxpu.Dockerfile @@ -0,0 +1,33 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Kunlun XPU. +FROM docker.1ms.run/wjie520/vllm_kunlun:v0.10.1.1rc1 + + +# Install Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install "mineru[api,gradio]>=2.7.6" \ + "matplotlib>=3.10,<4" \ + "ultralytics>=8.3.48,<9" \ + "doclayout_yolo==0.0.4" \ + "ftfy>=6.3.1,<7" \ + "shapely>=2.0.7,<3" \ + "pyclipper>=1.3.0,<2" \ + "omegaconf>=2.3.0,<3" \ + -i https://mirrors.aliyun.com/pypi/simple && \ + sed -i '1,200{s/self\.act = act_layer()/self.act = nn.GELU()/;t;b};' /root/miniconda/envs/vllm_kunlun_0.10.1.1/lib/python3.10/site-packages/vllm_kunlun/models/qwen2_vl.py && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/IluvatarCorex.md b/docs/zh/usage/acceleration_cards/IluvatarCorex.md index 9c550a87..e6343cac 100644 --- a/docs/zh/usage/acceleration_cards/IluvatarCorex.md +++ b/docs/zh/usage/acceleration_cards/IluvatarCorex.md @@ -36,7 +36,7 @@ docker run --name mineru_docker \ --security-opt apparmor=unconfined \ -e VLLM_ENFORCE_CUDA_GRAPH=1 \ -e MINERU_MODEL_SOURCE=local \ - -e MINERU_LMDEPLOY_DEVICE=corex \ + -e MINERU_VLLM_DEVICE=corex \ -it mineru:corex-vllm-latest \ /bin/bash ``` diff --git a/docs/zh/usage/acceleration_cards/Kunlunxin.md b/docs/zh/usage/acceleration_cards/Kunlunxin.md new file mode 100644 index 00000000..4bc19993 --- /dev/null +++ b/docs/zh/usage/acceleration_cards/Kunlunxin.md @@ -0,0 +1,126 @@ +## 1. 测试平台 +以下为本指南测试使用的平台信息,供参考: +``` +os: Ubuntu 22.04.5 LTS +cpu: Intel x86-64 +xpu: P800 +driver: 515.58 +docker: 20.10.5 +``` + +## 2. 环境准备 + +### 2.1 使用 Dockerfile 构建镜像 (vllm) + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/kxpu.Dockerfile +docker build --network=host -t mineru:kxpu-vllm-latest -f kxpu.Dockerfile . +``` + +## 3. 启动 Docker 容器 + +```bash +docker run -u root --name mineru_docker \ + --device=/dev/xpu0:/dev/xpu0 \ + --device=/dev/xpu1:/dev/xpu1 \ + --device=/dev/xpu2:/dev/xpu2 \ + --device=/dev/xpu3:/dev/xpu3 \ + --device=/dev/xpu4:/dev/xpu4 \ + --device=/dev/xpu5:/dev/xpu5 \ + --device=/dev/xpu6:/dev/xpu6 \ + --device=/dev/xpu7:/dev/xpu7 \ + --device=/dev/xpuctrl:/dev/xpuctrl \ + --net=host \ + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + --tmpfs /dev/shm:rw,nosuid,nodev,exec,size=32g \ + --cap-add=SYS_PTRACE \ + -v /home/users/vllm-kunlun:/home/vllm-kunlun \ + -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \ + -w /workspace \ + -e MINERU_MODEL_SOURCE=local \ + -e MINERU_VLLM_DEVICE=kxpu \ + -it mineru:kxpu-vllm-latest \ + /bin/bash +``` + +执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。 +您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。 + + +## 4. 注意事项 + +不同环境下,MinerU对Cambricon加速卡的支持情况如下表所示: + +>[!TIP] +> - `vllm`黄灯问题为不支持`hybrid-auto-engine`模式,`vlm-auto-engine`不受影响。 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
使用场景容器环境
vllm
命令行工具(mineru)pipeline🟢
<vlm/hybrid>-auto-engine🟡
<vlm/hybrid>-http-client🟢
fastapi服务(mineru-api)pipeline🟢
<vlm/hybrid>-auto-engine🟡
<vlm/hybrid>-http-client🟢
gradio界面(mineru-gradio)pipeline🟢
<vlm/hybrid>-auto-engine🟡
<vlm/hybrid>-http-client🟢
openai-server服务(mineru-openai-server)🟢
数据并行 (--data-parallel-size)🔴
+ +注: +🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致 +🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异 +🔴: 不支持,无法运行,或精度存在较大差异 + +>[!TIP] +> - Kunlunxin加速卡指定可用加速卡的方式与NVIDIA GPU类似,请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明, +>将环境变量`CUDA_VISIBLE_DEVICES`替换为`XPU_VISIBLE_DEVICES`即可。 +> - 在Kunlunxin平台可以通过`xpu-smi`命令查看加速卡的使用情况,并根据需要指定空闲的加速卡ID以避免资源冲突。 \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/MooreThreads.md b/docs/zh/usage/acceleration_cards/MooreThreads.md index 981fc26c..16d88229 100644 --- a/docs/zh/usage/acceleration_cards/MooreThreads.md +++ b/docs/zh/usage/acceleration_cards/MooreThreads.md @@ -27,6 +27,7 @@ docker run -u root --name mineru_docker \ --shm-size=80g \ --privileged \ -e MTHREADS_VISIBLE_DEVICES=all \ + -e MINERU_VLLM_DEVICE=musa \ -e MINERU_MODEL_SOURCE=local \ -it mineru:musa-vllm-latest \ /bin/bash diff --git a/mineru/backend/vlm/utils.py b/mineru/backend/vlm/utils.py index 24b04b90..6ebc3d09 100644 --- a/mineru/backend/vlm/utils.py +++ b/mineru/backend/vlm/utils.py @@ -102,4 +102,120 @@ def set_default_batch_size() -> int: except Exception as e: logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1') batch_size = 1 - return batch_size \ No newline at end of file + return batch_size + + +def _get_device_config(device_type: str) -> dict | None: + """获取不同设备类型的配置参数""" + + # 各设备类型的配置定义 + DEVICE_CONFIGS = { + # "musa": { + # "compilation_config_dict": { + # "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30], + # "simple_cuda_graph": True + # }, + # "block_size": 32, + # }, + "corex": { + "compilation_config_dict": { + "cudagraph_mode": "FULL_DECODE_ONLY", + "level": 0 + }, + }, + "kxpu": { + "compilation_config_dict": { + "splitting_ops": [ + "vllm.unified_attention", "vllm.unified_attention_with_output", + "vllm.unified_attention_with_output_kunlun", "vllm.mamba_mixer2", + "vllm.mamba_mixer", "vllm.short_conv", "vllm.linear_attention", + "vllm.plamo2_mamba_mixer", "vllm.gdn_attention", "vllm.sparse_attn_indexer" + ] + }, + "block_size": 128, + "dtype": "float16", + "distributed_executor_backend": "mp", + }, + } + + return DEVICE_CONFIGS.get(device_type.lower()) + + +def _check_server_arg_exists(args: list, arg_name: str) -> bool: + """检查命令行参数列表中是否已存在指定参数""" + return any(arg == f"--{arg_name}" or arg.startswith(f"--{arg_name}=") for arg in args) + + +def _add_server_arg_if_missing(args: list, arg_name: str, value: str) -> None: + """如果参数不存在,则添加到命令行参数列表""" + if not _check_server_arg_exists(args, arg_name): + args.extend([f"--{arg_name}", value]) + + +def _add_engine_kwarg_if_missing(kwargs: dict, key: str, value) -> None: + """如果参数不存在,则添加到 kwargs 字典""" + if key not in kwargs: + kwargs[key] = value + + +def mod_kwargs_by_device_type(kwargs_or_args: dict | list, vllm_mode: str) -> dict | list: + """根据设备类型修改 vllm 配置参数 + + Args: + kwargs_or_args: 配置参数,server 模式为 list,engine 模式为 dict + vllm_mode: vllm 运行模式 ("server", "sync_engine", "async_engine") + + Returns: + 修改后的配置参数 + """ + device_type = os.getenv("MINERU_VLLM_DEVICE", "") + config = _get_device_config(device_type) + + if config is None: + return kwargs_or_args + + if vllm_mode == "server": + _apply_server_config(kwargs_or_args, config) + else: + _apply_engine_config(kwargs_or_args, config, vllm_mode) + + return kwargs_or_args + + +def _apply_server_config(args: list, config: dict) -> None: + """应用 server 模式的配置""" + import json + + if "compilation_config_dict" in config: + _add_server_arg_if_missing( + args, "compilation-config", + json.dumps(config["compilation_config_dict"], separators=(',', ':')) + ) + + for key in ["block_size", "dtype", "distributed_executor_backend"]: + if key in config: + # 转换 key 格式: block_size -> block-size + arg_name = key.replace("_", "-") + _add_server_arg_if_missing(args, arg_name, str(config[key])) + + +def _apply_engine_config(kwargs: dict, config: dict, vllm_mode: str) -> None: + """应用 engine 模式的配置""" + try: + from vllm.config import CompilationConfig + except ImportError: + raise ImportError("Please install vllm to use the vllm-async-engine backend.") + + if "compilation_config_dict" in config: + config_dict = config["compilation_config_dict"] + if vllm_mode == "sync_engine": + compilation_config = config_dict + elif vllm_mode == "async_engine": + compilation_config = CompilationConfig(**config_dict) + else: + return + _add_engine_kwarg_if_missing(kwargs, "compilation_config", compilation_config) + + for key in ["block_size", "dtype", "distributed_executor_backend"]: + if key in config: + _add_engine_kwarg_if_missing(kwargs, key, config[key]) diff --git a/mineru/backend/vlm/vlm_analyze.py b/mineru/backend/vlm/vlm_analyze.py index 58884d3f..e66658ee 100644 --- a/mineru/backend/vlm/vlm_analyze.py +++ b/mineru/backend/vlm/vlm_analyze.py @@ -6,7 +6,7 @@ import json from loguru import logger from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \ - set_lmdeploy_backend + set_lmdeploy_backend, mod_kwargs_by_device_type from .model_output_to_middle_json import result_to_middle_json from ...data.data_reader_writer import DataWriter from mineru.utils.pdf_image_tools import load_images_from_pdf @@ -101,27 +101,7 @@ class ModelSingleton: except ImportError: raise ImportError("Please install vllm to use the vllm-engine backend.") - # musa vllm v1 引擎特殊配置 - # device = get_device() - # if device_type.startswith("musa"): - # import torch - # if torch.musa.is_available(): - # compilation_config = { - # "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30], - # "simple_cuda_graph": True - # } - # block_size = 32 - # kwargs["compilation_config"] = compilation_config - # kwargs["block_size"] = block_size - - # corex vllm v1 引擎特殊配置 - device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "") - if device_type.lower() == "corex": - compilation_config = { - "cudagraph_mode": "FULL_DECODE_ONLY", - "level": 0 - } - kwargs["compilation_config"] = compilation_config + kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="sync_engine") if "compilation_config" in kwargs: if isinstance(kwargs["compilation_config"], str): @@ -148,28 +128,7 @@ class ModelSingleton: except ImportError: raise ImportError("Please install vllm to use the vllm-async-engine backend.") - - # musa vllm v1 引擎特殊配置 - # device = get_device() - # if device.startswith("musa"): - # import torch - # if torch.musa.is_available(): - # compilation_config = CompilationConfig( - # cudagraph_capture_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30], - # simple_cuda_graph=True - # ) - # block_size = 32 - # kwargs["compilation_config"] = compilation_config - # kwargs["block_size"] = block_size - - # corex vllm v1 引擎特殊配置 - device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "") - if device_type.lower() == "corex": - compilation_config = CompilationConfig( - cudagraph_mode="FULL_DECODE_ONLY", - level=0 - ) - kwargs["compilation_config"] = compilation_config + kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="async_engine") if "compilation_config" in kwargs: if isinstance(kwargs["compilation_config"], dict): diff --git a/mineru/model/vlm/vllm_server.py b/mineru/model/vlm/vllm_server.py index eb2c3286..8d438f1d 100644 --- a/mineru/model/vlm/vllm_server.py +++ b/mineru/model/vlm/vllm_server.py @@ -1,8 +1,8 @@ import os import sys -from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors -from mineru.utils.config_reader import get_device +from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors, \ + mod_kwargs_by_device_type from mineru.utils.models_download_utils import auto_download_and_get_model_root_path from vllm.entrypoints.cli.main import main as vllm_main @@ -14,8 +14,6 @@ def main(): has_port_arg = False has_gpu_memory_utilization_arg = False has_logits_processors_arg = False - has_block_size_arg = False - has_compilation_config = False model_path = None model_arg_indices = [] @@ -27,10 +25,6 @@ def main(): has_gpu_memory_utilization_arg = True if arg == "--logits-processors" or arg.startswith("--logits-processors="): has_logits_processors_arg = True - if arg == "--block-size" or arg.startswith("--block-size="): - has_block_size_arg = True - if arg == "--compilation-config" or arg.startswith("--compilation-config="): - has_compilation_config = True if arg == "--model": if i + 1 < len(args): model_path = args[i + 1] @@ -57,21 +51,7 @@ def main(): if (not has_logits_processors_arg) and custom_logits_processors: args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"]) - # musa vllm v1 引擎特殊配置 - # device = get_device() - # if device.startswith("musa"): - # import torch - # if torch.musa.is_available(): - # if not has_block_size_arg: - # args.extend(["--block-size", "32"]) - # if not has_compilation_config: - # args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}']) - - # corex vllm v1 引擎特殊配置 - device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "") - if device_type.lower() == "corex": - if not has_compilation_config: - args.extend(["--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}']) + args = mod_kwargs_by_device_type(args, vllm_mode="server") # 重构参数,将模型路径作为位置参数 sys.argv = [sys.argv[0]] + ["serve", model_path] + args