From 73b31d1118281062513fd66d88d5c391f9225355 Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Thu, 5 Feb 2026 14:25:43 +0800
Subject: [PATCH] feat: add Kunlunxin platform documentation and Dockerfile for
 vLLM support

---
 docker/china/kxpu.Dockerfile                  |  33 +++++
 .../usage/acceleration_cards/IluvatarCorex.md |   2 +-
 docs/zh/usage/acceleration_cards/Kunlunxin.md | 126 ++++++++++++++++++
 .../usage/acceleration_cards/MooreThreads.md  |   1 +
 mineru/backend/vlm/utils.py                   | 118 +++++++++++++++-
 mineru/backend/vlm/vlm_analyze.py             |  47 +------
 mineru/model/vlm/vllm_server.py               |  26 +---
 7 files changed, 284 insertions(+), 69 deletions(-)
 create mode 100644 docker/china/kxpu.Dockerfile
 create mode 100644 docs/zh/usage/acceleration_cards/Kunlunxin.md

diff --git a/docker/china/kxpu.Dockerfile b/docker/china/kxpu.Dockerfile
new file mode 100644
index 00000000..91605ee4
--- /dev/null
+++ b/docker/china/kxpu.Dockerfile
@@ -0,0 +1,33 @@
+# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Kunlun XPU.
+FROM docker.1ms.run/wjie520/vllm_kunlun:v0.10.1.1rc1
+
+
+# Install Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install mineru latest
+RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install "mineru[api,gradio]>=2.7.6" \
+                            "matplotlib>=3.10,<4" \
+                            "ultralytics>=8.3.48,<9" \
+                            "doclayout_yolo==0.0.4" \
+                            "ftfy>=6.3.1,<7" \
+                            "shapely>=2.0.7,<3" \
+                            "pyclipper>=1.3.0,<2" \
+                            "omegaconf>=2.3.0,<3" \
+                            -i https://mirrors.aliyun.com/pypi/simple && \
+    sed -i '1,200{s/self\.act = act_layer()/self.act = nn.GELU()/;t;b};' /root/miniconda/envs/vllm_kunlun_0.10.1.1/lib/python3.10/site-packages/vllm_kunlun/models/qwen2_vl.py && \
+    python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
\ No newline at end of file
diff --git a/docs/zh/usage/acceleration_cards/IluvatarCorex.md b/docs/zh/usage/acceleration_cards/IluvatarCorex.md
index 9c550a87..e6343cac 100644
--- a/docs/zh/usage/acceleration_cards/IluvatarCorex.md
+++ b/docs/zh/usage/acceleration_cards/IluvatarCorex.md
@@ -36,7 +36,7 @@ docker run --name mineru_docker \
    --security-opt apparmor=unconfined \
    -e VLLM_ENFORCE_CUDA_GRAPH=1 \
    -e MINERU_MODEL_SOURCE=local \
-   -e MINERU_LMDEPLOY_DEVICE=corex \
+   -e MINERU_VLLM_DEVICE=corex \
    -it mineru:corex-vllm-latest \
    /bin/bash
 ```
diff --git a/docs/zh/usage/acceleration_cards/Kunlunxin.md b/docs/zh/usage/acceleration_cards/Kunlunxin.md
new file mode 100644
index 00000000..4bc19993
--- /dev/null
+++ b/docs/zh/usage/acceleration_cards/Kunlunxin.md
@@ -0,0 +1,126 @@
+## 1. 测试平台
+以下为本指南测试使用的平台信息，供参考：
+```
+os: Ubuntu 22.04.5 LTS  
+cpu: Intel x86-64
+xpu: P800
+driver: 515.58
+docker: 20.10.5
+```
+
+## 2. 环境准备
+
+### 2.1 使用 Dockerfile 构建镜像 （vllm）
+
+```bash
+wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/kxpu.Dockerfile
+docker build --network=host -t mineru:kxpu-vllm-latest -f kxpu.Dockerfile .
+```
+
+## 3. 启动 Docker 容器
+
+```bash
+docker run -u root --name mineru_docker \
+    --device=/dev/xpu0:/dev/xpu0 \
+    --device=/dev/xpu1:/dev/xpu1 \
+    --device=/dev/xpu2:/dev/xpu2 \
+    --device=/dev/xpu3:/dev/xpu3 \
+    --device=/dev/xpu4:/dev/xpu4 \
+    --device=/dev/xpu5:/dev/xpu5 \
+    --device=/dev/xpu6:/dev/xpu6 \
+    --device=/dev/xpu7:/dev/xpu7 \
+    --device=/dev/xpuctrl:/dev/xpuctrl \
+    --net=host \
+    --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
+    --tmpfs /dev/shm:rw,nosuid,nodev,exec,size=32g \
+    --cap-add=SYS_PTRACE \
+    -v /home/users/vllm-kunlun:/home/vllm-kunlun \
+    -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \
+    -w /workspace \
+    -e MINERU_MODEL_SOURCE=local \
+    -e MINERU_VLLM_DEVICE=kxpu \
+    -it mineru:kxpu-vllm-latest \
+    /bin/bash
+```
+
+执行该命令后，您将进入到Docker容器的交互式终端，您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。
+您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务，详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。
+
+
+## 4. 注意事项
+
+不同环境下，MinerU对Cambricon加速卡的支持情况如下表所示：
+
+>[!TIP]
+> - `vllm`黄灯问题为不支持`hybrid-auto-engine`模式，`vlm-auto-engine`不受影响。
+
+<table border="1">
+  <thead>
+    <tr>
+      <th rowspan="2" colspan="2">使用场景</th>
+      <th colspan="2">容器环境</th>
+    </tr>
+    <tr>
+      <th>vllm</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td rowspan="3">命令行工具(mineru)</td>
+      <td>pipeline</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-auto-engine</td>
+      <td>🟡</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-http-client</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td rowspan="3">fastapi服务(mineru-api)</td>
+      <td>pipeline</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-auto-engine</td>
+      <td>🟡</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-http-client</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td rowspan="3">gradio界面(mineru-gradio)</td>
+      <td>pipeline</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-auto-engine</td>
+      <td>🟡</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-http-client</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td colspan="2">openai-server服务（mineru-openai-server）</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td colspan="2">数据并行 (--data-parallel-size)</td>
+      <td>🔴</td>
+    </tr>
+  </tbody>
+</table>
+
+注：  
+🟢: 支持，运行较稳定，精度与Nvidia GPU基本一致  
+🟡: 支持但较不稳定，在某些场景下可能出现异常，或精度存在一定差异  
+🔴: 不支持，无法运行，或精度存在较大差异
+
+>[!TIP]
+> - Kunlunxin加速卡指定可用加速卡的方式与NVIDIA GPU类似，请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明,
+>将环境变量`CUDA_VISIBLE_DEVICES`替换为`XPU_VISIBLE_DEVICES`即可。 
+> - 在Kunlunxin平台可以通过`xpu-smi`命令查看加速卡的使用情况，并根据需要指定空闲的加速卡ID以避免资源冲突。
\ No newline at end of file
diff --git a/docs/zh/usage/acceleration_cards/MooreThreads.md b/docs/zh/usage/acceleration_cards/MooreThreads.md
index 981fc26c..16d88229 100644
--- a/docs/zh/usage/acceleration_cards/MooreThreads.md
+++ b/docs/zh/usage/acceleration_cards/MooreThreads.md
@@ -27,6 +27,7 @@ docker run -u root --name mineru_docker \
     --shm-size=80g \
     --privileged \
     -e MTHREADS_VISIBLE_DEVICES=all \
+    -e MINERU_VLLM_DEVICE=musa \
     -e MINERU_MODEL_SOURCE=local \
     -it mineru:musa-vllm-latest \
     /bin/bash
diff --git a/mineru/backend/vlm/utils.py b/mineru/backend/vlm/utils.py
index 24b04b90..6ebc3d09 100644
--- a/mineru/backend/vlm/utils.py
+++ b/mineru/backend/vlm/utils.py
@@ -102,4 +102,120 @@ def set_default_batch_size() -> int:
     except Exception as e:
         logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
         batch_size = 1
-    return batch_size
\ No newline at end of file
+    return batch_size
+
+
+def _get_device_config(device_type: str) -> dict | None:
+    """获取不同设备类型的配置参数"""
+
+    # 各设备类型的配置定义
+    DEVICE_CONFIGS = {
+        # "musa": {
+        #     "compilation_config_dict": {
+        #         "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
+        #         "simple_cuda_graph": True
+        #     },
+        #     "block_size": 32,
+        # },
+        "corex": {
+            "compilation_config_dict": {
+                "cudagraph_mode": "FULL_DECODE_ONLY",
+                "level": 0
+            },
+        },
+        "kxpu": {
+            "compilation_config_dict": {
+                "splitting_ops": [
+                    "vllm.unified_attention", "vllm.unified_attention_with_output",
+                    "vllm.unified_attention_with_output_kunlun", "vllm.mamba_mixer2",
+                    "vllm.mamba_mixer", "vllm.short_conv", "vllm.linear_attention",
+                    "vllm.plamo2_mamba_mixer", "vllm.gdn_attention", "vllm.sparse_attn_indexer"
+                ]
+            },
+            "block_size": 128,
+            "dtype": "float16",
+            "distributed_executor_backend": "mp",
+        },
+    }
+
+    return DEVICE_CONFIGS.get(device_type.lower())
+
+
+def _check_server_arg_exists(args: list, arg_name: str) -> bool:
+    """检查命令行参数列表中是否已存在指定参数"""
+    return any(arg == f"--{arg_name}" or arg.startswith(f"--{arg_name}=") for arg in args)
+
+
+def _add_server_arg_if_missing(args: list, arg_name: str, value: str) -> None:
+    """如果参数不存在，则添加到命令行参数列表"""
+    if not _check_server_arg_exists(args, arg_name):
+        args.extend([f"--{arg_name}", value])
+
+
+def _add_engine_kwarg_if_missing(kwargs: dict, key: str, value) -> None:
+    """如果参数不存在，则添加到 kwargs 字典"""
+    if key not in kwargs:
+        kwargs[key] = value
+
+
+def mod_kwargs_by_device_type(kwargs_or_args: dict | list, vllm_mode: str) -> dict | list:
+    """根据设备类型修改 vllm 配置参数
+
+    Args:
+        kwargs_or_args: 配置参数，server 模式为 list，engine 模式为 dict
+        vllm_mode: vllm 运行模式 ("server", "sync_engine", "async_engine")
+
+    Returns:
+        修改后的配置参数
+    """
+    device_type = os.getenv("MINERU_VLLM_DEVICE", "")
+    config = _get_device_config(device_type)
+
+    if config is None:
+        return kwargs_or_args
+
+    if vllm_mode == "server":
+        _apply_server_config(kwargs_or_args, config)
+    else:
+        _apply_engine_config(kwargs_or_args, config, vllm_mode)
+
+    return kwargs_or_args
+
+
+def _apply_server_config(args: list, config: dict) -> None:
+    """应用 server 模式的配置"""
+    import json
+
+    if "compilation_config_dict" in config:
+        _add_server_arg_if_missing(
+            args, "compilation-config",
+            json.dumps(config["compilation_config_dict"], separators=(',', ':'))
+        )
+
+    for key in ["block_size", "dtype", "distributed_executor_backend"]:
+        if key in config:
+            # 转换 key 格式: block_size -> block-size
+            arg_name = key.replace("_", "-")
+            _add_server_arg_if_missing(args, arg_name, str(config[key]))
+
+
+def _apply_engine_config(kwargs: dict, config: dict, vllm_mode: str) -> None:
+    """应用 engine 模式的配置"""
+    try:
+        from vllm.config import CompilationConfig
+    except ImportError:
+        raise ImportError("Please install vllm to use the vllm-async-engine backend.")
+
+    if "compilation_config_dict" in config:
+        config_dict = config["compilation_config_dict"]
+        if vllm_mode == "sync_engine":
+            compilation_config = config_dict
+        elif vllm_mode == "async_engine":
+            compilation_config = CompilationConfig(**config_dict)
+        else:
+            return
+        _add_engine_kwarg_if_missing(kwargs, "compilation_config", compilation_config)
+
+    for key in ["block_size", "dtype", "distributed_executor_backend"]:
+        if key in config:
+            _add_engine_kwarg_if_missing(kwargs, key, config[key])
diff --git a/mineru/backend/vlm/vlm_analyze.py b/mineru/backend/vlm/vlm_analyze.py
index 58884d3f..e66658ee 100644
--- a/mineru/backend/vlm/vlm_analyze.py
+++ b/mineru/backend/vlm/vlm_analyze.py
@@ -6,7 +6,7 @@ import json
 from loguru import logger
 
 from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
-    set_lmdeploy_backend
+    set_lmdeploy_backend, mod_kwargs_by_device_type
 from .model_output_to_middle_json import result_to_middle_json
 from ...data.data_reader_writer import DataWriter
 from mineru.utils.pdf_image_tools import load_images_from_pdf
@@ -101,27 +101,7 @@ class ModelSingleton:
                     except ImportError:
                         raise ImportError("Please install vllm to use the vllm-engine backend.")
 
-                    # musa vllm v1 引擎特殊配置
-                    # device = get_device()
-                    # if device_type.startswith("musa"):
-                    #     import torch
-                    #     if torch.musa.is_available():
-                    #         compilation_config = {
-                    #             "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
-                    #             "simple_cuda_graph": True
-                    #         }
-                    #         block_size = 32
-                    #         kwargs["compilation_config"] = compilation_config
-                    #         kwargs["block_size"] = block_size
-
-                    # corex vllm v1 引擎特殊配置
-                    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
-                    if device_type.lower() == "corex":
-                        compilation_config = {
-                            "cudagraph_mode": "FULL_DECODE_ONLY",
-                            "level": 0
-                        }
-                        kwargs["compilation_config"] = compilation_config
+                    kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="sync_engine")
 
                     if "compilation_config" in kwargs:
                         if isinstance(kwargs["compilation_config"], str):
@@ -148,28 +128,7 @@ class ModelSingleton:
                     except ImportError:
                         raise ImportError("Please install vllm to use the vllm-async-engine backend.")
 
-
-                    # musa vllm v1 引擎特殊配置
-                    # device = get_device()
-                    # if device.startswith("musa"):
-                    #     import torch
-                    #     if torch.musa.is_available():
-                    #         compilation_config = CompilationConfig(
-                    #             cudagraph_capture_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
-                    #             simple_cuda_graph=True
-                    #         )
-                    #         block_size = 32
-                    #         kwargs["compilation_config"] = compilation_config
-                    #         kwargs["block_size"] = block_size
-
-                    # corex vllm v1 引擎特殊配置
-                    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
-                    if device_type.lower() == "corex":
-                        compilation_config = CompilationConfig(
-                            cudagraph_mode="FULL_DECODE_ONLY",
-                            level=0
-                        )
-                        kwargs["compilation_config"] = compilation_config
+                    kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="async_engine")
 
                     if "compilation_config" in kwargs:
                         if isinstance(kwargs["compilation_config"], dict):
diff --git a/mineru/model/vlm/vllm_server.py b/mineru/model/vlm/vllm_server.py
index eb2c3286..8d438f1d 100644
--- a/mineru/model/vlm/vllm_server.py
+++ b/mineru/model/vlm/vllm_server.py
@@ -1,8 +1,8 @@
 import os
 import sys
 
-from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors
-from mineru.utils.config_reader import get_device
+from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors, \
+    mod_kwargs_by_device_type
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
 
 from vllm.entrypoints.cli.main import main as vllm_main
@@ -14,8 +14,6 @@ def main():
     has_port_arg = False
     has_gpu_memory_utilization_arg = False
     has_logits_processors_arg = False
-    has_block_size_arg = False
-    has_compilation_config = False
     model_path = None
     model_arg_indices = []
 
@@ -27,10 +25,6 @@ def main():
             has_gpu_memory_utilization_arg = True
         if arg == "--logits-processors" or arg.startswith("--logits-processors="):
             has_logits_processors_arg = True
-        if arg == "--block-size" or arg.startswith("--block-size="):
-            has_block_size_arg = True
-        if arg == "--compilation-config" or arg.startswith("--compilation-config="):
-            has_compilation_config = True
         if arg == "--model":
             if i + 1 < len(args):
                 model_path = args[i + 1]
@@ -57,21 +51,7 @@ def main():
     if (not has_logits_processors_arg) and custom_logits_processors:
         args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
 
-    # musa vllm v1 引擎特殊配置 
-    # device = get_device()
-    # if device.startswith("musa"):
-    #     import torch
-    #     if torch.musa.is_available():
-    #         if not has_block_size_arg:
-    #             args.extend(["--block-size", "32"])
-    #         if not has_compilation_config:
-    #             args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}'])
-
-    # corex vllm v1 引擎特殊配置
-    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
-    if device_type.lower() == "corex":
-        if not has_compilation_config:
-            args.extend(["--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}'])
+    args = mod_kwargs_by_device_type(args, vllm_mode="server")
 
     # 重构参数，将模型路径作为位置参数
     sys.argv = [sys.argv[0]] + ["serve", model_path] + args