Merge pull request #4500 from myhloli/dev

Dev
2026-03-27 11:08:32 +07:00 · 2026-02-05 15:46:32 +08:00
parent 74ec4894e0 97450688d6
commit 5397c74a34
10 changed files with 291 additions and 80 deletions
--- a/docker/china/kxpu.Dockerfile
+++ b/docker/china/kxpu.Dockerfile
@@ -0,0 +1,33 @@
+# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Kunlun XPU.
+FROM docker.1ms.run/wjie520/vllm_kunlun:v0.10.1.1rc1
+
+
+# Install Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install mineru latest
+RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install "mineru[api,gradio]>=2.7.6" \
+                            "matplotlib>=3.10,<4" \
+                            "ultralytics>=8.3.48,<9" \
+                            "doclayout_yolo==0.0.4" \
+                            "ftfy>=6.3.1,<7" \
+                            "shapely>=2.0.7,<3" \
+                            "pyclipper>=1.3.0,<2" \
+                            "omegaconf>=2.3.0,<3" \
+                            -i https://mirrors.aliyun.com/pypi/simple && \
+    sed -i '1,200{s/self\.act = act_layer()/self.act = nn.GELU()/;t;b};' /root/miniconda/envs/vllm_kunlun_0.10.1.1/lib/python3.10/site-packages/vllm_kunlun/models/qwen2_vl.py && \
+    python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
--- a/docs/zh/usage/acceleration_cards/IluvatarCorex.md
+++ b/docs/zh/usage/acceleration_cards/IluvatarCorex.md
@@ -36,7 +36,7 @@ docker run --name mineru_docker \
   --security-opt apparmor=unconfined \
   -e VLLM_ENFORCE_CUDA_GRAPH=1 \
   -e MINERU_MODEL_SOURCE=local \
-   -e MINERU_LMDEPLOY_DEVICE=corex \
+   -e MINERU_VLLM_DEVICE=corex \
   -it mineru:corex-vllm-latest \
   /bin/bash
 ```
--- a/docs/zh/usage/acceleration_cards/Kunlunxin.md
+++ b/docs/zh/usage/acceleration_cards/Kunlunxin.md
@@ -0,0 +1,127 @@
+## 1. 测试平台
+以下为本指南测试使用的平台信息，供参考：
+```
+os: Ubuntu 22.04.5 LTS  
+cpu: Intel x86-64
+xpu: P800
+driver: 515.58
+docker: 20.10.5
+```
+
+## 2. 环境准备
+
+### 2.1 使用 Dockerfile 构建镜像 （vllm）
+
+```bash
+wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/kxpu.Dockerfile
+docker build --network=host -t mineru:kxpu-vllm-latest -f kxpu.Dockerfile .
+```
+
+## 3. 启动 Docker 容器
+
+```bash
+docker run -u root --name mineru_docker \
+    --device=/dev/xpu0:/dev/xpu0 \
+    --device=/dev/xpu1:/dev/xpu1 \
+    --device=/dev/xpu2:/dev/xpu2 \
+    --device=/dev/xpu3:/dev/xpu3 \
+    --device=/dev/xpu4:/dev/xpu4 \
+    --device=/dev/xpu5:/dev/xpu5 \
+    --device=/dev/xpu6:/dev/xpu6 \
+    --device=/dev/xpu7:/dev/xpu7 \
+    --device=/dev/xpuctrl:/dev/xpuctrl \
+    --net=host \
+    --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
+    --tmpfs /dev/shm:rw,nosuid,nodev,exec,size=32g \
+    --cap-add=SYS_PTRACE \
+    -v /home/users/vllm-kunlun:/home/vllm-kunlun \
+    -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \
+    -w /workspace \
+    -e MINERU_MODEL_SOURCE=local \
+    -e MINERU_FORMULA_CH_SUPPORT=true \
+    -e MINERU_VLLM_DEVICE=kxpu \
+    -it mineru:kxpu-vllm-latest \
+    /bin/bash
+```
+
+执行该命令后，您将进入到Docker容器的交互式终端，您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。
+您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务，详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。
+
+
+## 4. 注意事项
+
+不同环境下，MinerU对Cambricon加速卡的支持情况如下表所示：
+
+>[!TIP]
+> - `vllm`黄灯问题为不支持`hybrid-auto-engine`模式，`vlm-auto-engine`不受影响。
+
+<table border="1">
+  <thead>
+    <tr>
+      <th rowspan="2" colspan="2">使用场景</th>
+      <th colspan="2">容器环境</th>
+    </tr>
+    <tr>
+      <th>vllm</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td rowspan="3">命令行工具(mineru)</td>
+      <td>pipeline</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-auto-engine</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-http-client</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td rowspan="3">fastapi服务(mineru-api)</td>
+      <td>pipeline</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-auto-engine</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-http-client</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td rowspan="3">gradio界面(mineru-gradio)</td>
+      <td>pipeline</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-auto-engine</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td>&lt;vlm/hybrid&gt;-http-client</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td colspan="2">openai-server服务（mineru-openai-server）</td>
+      <td>🟢</td>
+    </tr>
+    <tr>
+      <td colspan="2">数据并行 (--data-parallel-size)</td>
+      <td>🔴</td>
+    </tr>
+  </tbody>
+</table>
+
+注：  
+🟢: 支持，运行较稳定，精度与Nvidia GPU基本一致  
+🟡: 支持但较不稳定，在某些场景下可能出现异常，或精度存在一定差异  
+🔴: 不支持，无法运行，或精度存在较大差异
+
+>[!TIP]
+> - Kunlunxin加速卡指定可用加速卡的方式与NVIDIA GPU类似，请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明,
+>将环境变量`CUDA_VISIBLE_DEVICES`替换为`XPU_VISIBLE_DEVICES`即可。 
+> - 在Kunlunxin平台可以通过`xpu-smi`命令查看加速卡的使用情况，并根据需要指定空闲的加速卡ID以避免资源冲突。
--- a/docs/zh/usage/acceleration_cards/MooreThreads.md
+++ b/docs/zh/usage/acceleration_cards/MooreThreads.md
@@ -27,6 +27,7 @@ docker run -u root --name mineru_docker \
    --shm-size=80g \
    --privileged \
    -e MTHREADS_VISIBLE_DEVICES=all \
+    -e MINERU_VLLM_DEVICE=musa \
    -e MINERU_MODEL_SOURCE=local \
    -it mineru:musa-vllm-latest \
    /bin/bash
--- a/docs/zh/usage/acceleration_cards/THead.md
+++ b/docs/zh/usage/acceleration_cards/THead.md
@@ -127,7 +127,7 @@ docker run --privileged=true \
    </tr>
    <tr>
      <td colspan="2">数据并行 (--data-parallel-size/--dp)</td>
-      <td>🟡</td>
+      <td>🔴</td>
      <td>🔴</td>
    </tr>
  </tbody>
--- a/docs/zh/usage/acceleration_cards/Tecorigin.md
+++ b/docs/zh/usage/acceleration_cards/Tecorigin.md
@@ -10,10 +10,6 @@ docker: 28.0.4

 ## 2. 环境准备

->[!NOTE]
->Tecorigin加速卡支持使用`vllm`进行VLM模型推理加速。请根据实际需求选择安装和使用:
-
-
 ### 2.1 下载并加载镜像 （vllm）

 ```bash
@@ -106,13 +102,9 @@ docker run -dit --name mineru_docker \
      <td colspan="2">openai-server服务（mineru-openai-server）</td>
      <td>🟢</td>
    </tr>
-    <tr>
-      <td colspan="2">张量并行 (--tensor-parallel-size)</td>
-      <td>🟢</td>
-    </tr>
    <tr>
      <td colspan="2">数据并行 (--data-parallel-size)</td>
-      <td>🟡</td>
+      <td>🔴</td>
    </tr>
  </tbody>
 </table>
--- a/mineru/backend/vlm/utils.py
+++ b/mineru/backend/vlm/utils.py
@@ -105,4 +105,117 @@ def set_default_batch_size() -> int:
    except Exception as e:
        logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
        batch_size = 1
-    return batch_size
+    return batch_size
+
+
+def _get_device_config(device_type: str) -> dict | None:
+    """获取不同设备类型的配置参数"""
+
+    # 各设备类型的配置定义
+    DEVICE_CONFIGS = {
+        # "musa": {
+        #     "compilation_config_dict": {
+        #         "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
+        #         "simple_cuda_graph": True
+        #     },
+        #     "block_size": 32,
+        # },
+        "corex": {
+            "compilation_config_dict": {
+                "cudagraph_mode": "FULL_DECODE_ONLY",
+                "level": 0
+            },
+        },
+        "kxpu": {
+            "compilation_config_dict": {
+                "splitting_ops": [
+                    "vllm.unified_attention", "vllm.unified_attention_with_output",
+                    "vllm.unified_attention_with_output_kunlun", "vllm.mamba_mixer2",
+                    "vllm.mamba_mixer", "vllm.short_conv", "vllm.linear_attention",
+                    "vllm.plamo2_mamba_mixer", "vllm.gdn_attention", "vllm.sparse_attn_indexer"
+                ]
+            },
+            "block_size": 128,
+            "dtype": "float16",
+            "distributed_executor_backend": "mp",
+        },
+    }
+
+    return DEVICE_CONFIGS.get(device_type.lower())
+
+
+def _check_server_arg_exists(args: list, arg_name: str) -> bool:
+    """检查命令行参数列表中是否已存在指定参数"""
+    return any(arg == f"--{arg_name}" or arg.startswith(f"--{arg_name}=") for arg in args)
+
+
+def _add_server_arg_if_missing(args: list, arg_name: str, value: str) -> None:
+    """如果参数不存在，则添加到命令行参数列表"""
+    if not _check_server_arg_exists(args, arg_name):
+        args.extend([f"--{arg_name}", value])
+
+
+def _add_engine_kwarg_if_missing(kwargs: dict, key: str, value) -> None:
+    """如果参数不存在，则添加到 kwargs 字典"""
+    if key not in kwargs:
+        kwargs[key] = value
+
+
+def mod_kwargs_by_device_type(kwargs_or_args: dict | list, vllm_mode: str) -> dict | list:
+    """根据设备类型修改 vllm 配置参数
+
+    Args:
+        kwargs_or_args: 配置参数，server 模式为 list，engine 模式为 dict
+        vllm_mode: vllm 运行模式 ("server", "sync_engine", "async_engine")
+
+    Returns:
+        修改后的配置参数
+    """
+    device_type = os.getenv("MINERU_VLLM_DEVICE", "")
+    config = _get_device_config(device_type)
+
+    if config is None:
+        return kwargs_or_args
+
+    if vllm_mode == "server":
+        _apply_server_config(kwargs_or_args, config)
+    else:
+        _apply_engine_config(kwargs_or_args, config, vllm_mode)
+
+    return kwargs_or_args
+
+
+def _apply_server_config(args: list, config: dict) -> None:
+    """应用 server 模式的配置"""
+    import json
+
+    for key, value in config.items():
+        if key == "compilation_config_dict":
+            _add_server_arg_if_missing(
+                args, "compilation-config",
+                json.dumps(value, separators=(',', ':'))
+            )
+        else:
+            # 转换 key 格式: block_size -> block-size
+            arg_name = key.replace("_", "-")
+            _add_server_arg_if_missing(args, arg_name, str(value))
+
+
+def _apply_engine_config(kwargs: dict, config: dict, vllm_mode: str) -> None:
+    """应用 engine 模式的配置"""
+    try:
+        from vllm.config import CompilationConfig
+    except ImportError:
+        raise ImportError("Please install vllm to use the vllm-async-engine backend.")
+
+    for key, value in config.items():
+        if key == "compilation_config_dict":
+            if vllm_mode == "sync_engine":
+                compilation_config = value
+            elif vllm_mode == "async_engine":
+                compilation_config = CompilationConfig(**value)
+            else:
+                continue
+            _add_engine_kwarg_if_missing(kwargs, "compilation_config", compilation_config)
+        else:
+            _add_engine_kwarg_if_missing(kwargs, key, value)
--- a/mineru/backend/vlm/vlm_analyze.py
+++ b/mineru/backend/vlm/vlm_analyze.py
@@ -6,7 +6,7 @@ import json
 from loguru import logger

 from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
-    set_lmdeploy_backend
+    set_lmdeploy_backend, mod_kwargs_by_device_type
 from .model_output_to_middle_json import result_to_middle_json
 from ...data.data_reader_writer import DataWriter
 from mineru.utils.pdf_image_tools import load_images_from_pdf
@@ -101,27 +101,7 @@ class ModelSingleton:
                    except ImportError:
                        raise ImportError("Please install vllm to use the vllm-engine backend.")

-                    # musa vllm v1 引擎特殊配置
-                    # device = get_device()
-                    # if device_type.startswith("musa"):
-                    #     import torch
-                    #     if torch.musa.is_available():
-                    #         compilation_config = {
-                    #             "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
-                    #             "simple_cuda_graph": True
-                    #         }
-                    #         block_size = 32
-                    #         kwargs["compilation_config"] = compilation_config
-                    #         kwargs["block_size"] = block_size
-
-                    # corex vllm v1 引擎特殊配置
-                    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
-                    if device_type.lower() == "corex":
-                        compilation_config = {
-                            "cudagraph_mode": "FULL_DECODE_ONLY",
-                            "level": 0
-                        }
-                        kwargs["compilation_config"] = compilation_config
+                    kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="sync_engine")

                    if "compilation_config" in kwargs:
                        if isinstance(kwargs["compilation_config"], str):
@@ -148,28 +128,7 @@ class ModelSingleton:
                    except ImportError:
                        raise ImportError("Please install vllm to use the vllm-async-engine backend.")

-
-                    # musa vllm v1 引擎特殊配置
-                    # device = get_device()
-                    # if device.startswith("musa"):
-                    #     import torch
-                    #     if torch.musa.is_available():
-                    #         compilation_config = CompilationConfig(
-                    #             cudagraph_capture_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
-                    #             simple_cuda_graph=True
-                    #         )
-                    #         block_size = 32
-                    #         kwargs["compilation_config"] = compilation_config
-                    #         kwargs["block_size"] = block_size
-
-                    # corex vllm v1 引擎特殊配置
-                    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
-                    if device_type.lower() == "corex":
-                        compilation_config = CompilationConfig(
-                            cudagraph_mode="FULL_DECODE_ONLY",
-                            level=0
-                        )
-                        kwargs["compilation_config"] = compilation_config
+                    kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="async_engine")

                    if "compilation_config" in kwargs:
                        if isinstance(kwargs["compilation_config"], dict):
--- a/mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py
+++ b/mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py
@@ -89,7 +89,11 @@ class FormulaRecognizer(BaseOCRV20):
        return rec_formula

    def batch_predict(
-        self, images_mfd_res: list, images: list, batch_size: int = 64
+        self,
+        images_mfd_res: list,
+        images: list,
+        batch_size: int = 64,
+        interline_enable: bool = True,
    ) -> list:
        images_formula_list = []
        mf_image_list = []
@@ -105,6 +109,8 @@ class FormulaRecognizer(BaseOCRV20):
            for idx, (xyxy, conf, cla) in enumerate(
                zip(mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls)
            ):
+                if not interline_enable and cla.item() == 1:
+                    continue  # Skip interline regions if not enabled
                xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
                new_item = {
                    "category_id": 13 + int(cla.item()),
--- a/mineru/model/vlm/vllm_server.py
+++ b/mineru/model/vlm/vllm_server.py
@@ -1,8 +1,8 @@
 import os
 import sys

-from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors
-from mineru.utils.config_reader import get_device
+from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors, \
+    mod_kwargs_by_device_type
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path

 from vllm.entrypoints.cli.main import main as vllm_main
@@ -14,8 +14,6 @@ def main():
    has_port_arg = False
    has_gpu_memory_utilization_arg = False
    has_logits_processors_arg = False
-    has_block_size_arg = False
-    has_compilation_config = False
    model_path = None
    model_arg_indices = []

@@ -27,10 +25,6 @@ def main():
            has_gpu_memory_utilization_arg = True
        if arg == "--logits-processors" or arg.startswith("--logits-processors="):
            has_logits_processors_arg = True
-        if arg == "--block-size" or arg.startswith("--block-size="):
-            has_block_size_arg = True
-        if arg == "--compilation-config" or arg.startswith("--compilation-config="):
-            has_compilation_config = True
        if arg == "--model":
            if i + 1 < len(args):
                model_path = args[i + 1]
@@ -57,21 +51,7 @@ def main():
    if (not has_logits_processors_arg) and custom_logits_processors:
        args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])

-    # musa vllm v1 引擎特殊配置 
-    # device = get_device()
-    # if device.startswith("musa"):
-    #     import torch
-    #     if torch.musa.is_available():
-    #         if not has_block_size_arg:
-    #             args.extend(["--block-size", "32"])
-    #         if not has_compilation_config:
-    #             args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}'])
-
-    # corex vllm v1 引擎特殊配置
-    device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
-    if device_type.lower() == "corex":
-        if not has_compilation_config:
-            args.extend(["--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}'])
+    args = mod_kwargs_by_device_type(args, vllm_mode="server")

    # 重构参数，将模型路径作为位置参数
    sys.argv = [sys.argv[0]] + ["serve", model_path] + args