mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
33
docker/china/kxpu.Dockerfile
Normal file
33
docker/china/kxpu.Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
||||
# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Kunlun XPU.
|
||||
FROM docker.1ms.run/wjie520/vllm_kunlun:v0.10.1.1rc1
|
||||
|
||||
|
||||
# Install Noto fonts for Chinese characters
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
fonts-noto-core \
|
||||
fonts-noto-cjk \
|
||||
fontconfig && \
|
||||
fc-cache -fv && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install mineru latest
|
||||
RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
|
||||
python3 -m pip install "mineru[api,gradio]>=2.7.6" \
|
||||
"matplotlib>=3.10,<4" \
|
||||
"ultralytics>=8.3.48,<9" \
|
||||
"doclayout_yolo==0.0.4" \
|
||||
"ftfy>=6.3.1,<7" \
|
||||
"shapely>=2.0.7,<3" \
|
||||
"pyclipper>=1.3.0,<2" \
|
||||
"omegaconf>=2.3.0,<3" \
|
||||
-i https://mirrors.aliyun.com/pypi/simple && \
|
||||
sed -i '1,200{s/self\.act = act_layer()/self.act = nn.GELU()/;t;b};' /root/miniconda/envs/vllm_kunlun_0.10.1.1/lib/python3.10/site-packages/vllm_kunlun/models/qwen2_vl.py && \
|
||||
python3 -m pip cache purge
|
||||
|
||||
# Download models and update the configuration file
|
||||
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
|
||||
|
||||
# Set the entry point to activate the virtual environment and run the command line tool
|
||||
ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
|
||||
@@ -36,7 +36,7 @@ docker run --name mineru_docker \
|
||||
--security-opt apparmor=unconfined \
|
||||
-e VLLM_ENFORCE_CUDA_GRAPH=1 \
|
||||
-e MINERU_MODEL_SOURCE=local \
|
||||
-e MINERU_LMDEPLOY_DEVICE=corex \
|
||||
-e MINERU_VLLM_DEVICE=corex \
|
||||
-it mineru:corex-vllm-latest \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
127
docs/zh/usage/acceleration_cards/Kunlunxin.md
Normal file
127
docs/zh/usage/acceleration_cards/Kunlunxin.md
Normal file
@@ -0,0 +1,127 @@
|
||||
## 1. 测试平台
|
||||
以下为本指南测试使用的平台信息,供参考:
|
||||
```
|
||||
os: Ubuntu 22.04.5 LTS
|
||||
cpu: Intel x86-64
|
||||
xpu: P800
|
||||
driver: 515.58
|
||||
docker: 20.10.5
|
||||
```
|
||||
|
||||
## 2. 环境准备
|
||||
|
||||
### 2.1 使用 Dockerfile 构建镜像 (vllm)
|
||||
|
||||
```bash
|
||||
wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/kxpu.Dockerfile
|
||||
docker build --network=host -t mineru:kxpu-vllm-latest -f kxpu.Dockerfile .
|
||||
```
|
||||
|
||||
## 3. 启动 Docker 容器
|
||||
|
||||
```bash
|
||||
docker run -u root --name mineru_docker \
|
||||
--device=/dev/xpu0:/dev/xpu0 \
|
||||
--device=/dev/xpu1:/dev/xpu1 \
|
||||
--device=/dev/xpu2:/dev/xpu2 \
|
||||
--device=/dev/xpu3:/dev/xpu3 \
|
||||
--device=/dev/xpu4:/dev/xpu4 \
|
||||
--device=/dev/xpu5:/dev/xpu5 \
|
||||
--device=/dev/xpu6:/dev/xpu6 \
|
||||
--device=/dev/xpu7:/dev/xpu7 \
|
||||
--device=/dev/xpuctrl:/dev/xpuctrl \
|
||||
--net=host \
|
||||
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
|
||||
--tmpfs /dev/shm:rw,nosuid,nodev,exec,size=32g \
|
||||
--cap-add=SYS_PTRACE \
|
||||
-v /home/users/vllm-kunlun:/home/vllm-kunlun \
|
||||
-v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \
|
||||
-w /workspace \
|
||||
-e MINERU_MODEL_SOURCE=local \
|
||||
-e MINERU_FORMULA_CH_SUPPORT=true \
|
||||
-e MINERU_VLLM_DEVICE=kxpu \
|
||||
-it mineru:kxpu-vllm-latest \
|
||||
/bin/bash
|
||||
```
|
||||
|
||||
执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。
|
||||
您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。
|
||||
|
||||
|
||||
## 4. 注意事项
|
||||
|
||||
不同环境下,MinerU对Cambricon加速卡的支持情况如下表所示:
|
||||
|
||||
>[!TIP]
|
||||
> - `vllm`黄灯问题为不支持`hybrid-auto-engine`模式,`vlm-auto-engine`不受影响。
|
||||
|
||||
<table border="1">
|
||||
<thead>
|
||||
<tr>
|
||||
<th rowspan="2" colspan="2">使用场景</th>
|
||||
<th colspan="2">容器环境</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>vllm</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td rowspan="3">命令行工具(mineru)</td>
|
||||
<td>pipeline</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><vlm/hybrid>-auto-engine</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><vlm/hybrid>-http-client</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="3">fastapi服务(mineru-api)</td>
|
||||
<td>pipeline</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><vlm/hybrid>-auto-engine</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><vlm/hybrid>-http-client</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="3">gradio界面(mineru-gradio)</td>
|
||||
<td>pipeline</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><vlm/hybrid>-auto-engine</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><vlm/hybrid>-http-client</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">openai-server服务(mineru-openai-server)</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">数据并行 (--data-parallel-size)</td>
|
||||
<td>🔴</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
注:
|
||||
🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致
|
||||
🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异
|
||||
🔴: 不支持,无法运行,或精度存在较大差异
|
||||
|
||||
>[!TIP]
|
||||
> - Kunlunxin加速卡指定可用加速卡的方式与NVIDIA GPU类似,请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明,
|
||||
>将环境变量`CUDA_VISIBLE_DEVICES`替换为`XPU_VISIBLE_DEVICES`即可。
|
||||
> - 在Kunlunxin平台可以通过`xpu-smi`命令查看加速卡的使用情况,并根据需要指定空闲的加速卡ID以避免资源冲突。
|
||||
@@ -27,6 +27,7 @@ docker run -u root --name mineru_docker \
|
||||
--shm-size=80g \
|
||||
--privileged \
|
||||
-e MTHREADS_VISIBLE_DEVICES=all \
|
||||
-e MINERU_VLLM_DEVICE=musa \
|
||||
-e MINERU_MODEL_SOURCE=local \
|
||||
-it mineru:musa-vllm-latest \
|
||||
/bin/bash
|
||||
|
||||
@@ -127,7 +127,7 @@ docker run --privileged=true \
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">数据并行 (--data-parallel-size/--dp)</td>
|
||||
<td>🟡</td>
|
||||
<td>🔴</td>
|
||||
<td>🔴</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
|
||||
@@ -10,10 +10,6 @@ docker: 28.0.4
|
||||
|
||||
## 2. 环境准备
|
||||
|
||||
>[!NOTE]
|
||||
>Tecorigin加速卡支持使用`vllm`进行VLM模型推理加速。请根据实际需求选择安装和使用:
|
||||
|
||||
|
||||
### 2.1 下载并加载镜像 (vllm)
|
||||
|
||||
```bash
|
||||
@@ -106,13 +102,9 @@ docker run -dit --name mineru_docker \
|
||||
<td colspan="2">openai-server服务(mineru-openai-server)</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">张量并行 (--tensor-parallel-size)</td>
|
||||
<td>🟢</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">数据并行 (--data-parallel-size)</td>
|
||||
<td>🟡</td>
|
||||
<td>🔴</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -105,4 +105,117 @@ def set_default_batch_size() -> int:
|
||||
except Exception as e:
|
||||
logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
|
||||
batch_size = 1
|
||||
return batch_size
|
||||
return batch_size
|
||||
|
||||
|
||||
def _get_device_config(device_type: str) -> dict | None:
|
||||
"""获取不同设备类型的配置参数"""
|
||||
|
||||
# 各设备类型的配置定义
|
||||
DEVICE_CONFIGS = {
|
||||
# "musa": {
|
||||
# "compilation_config_dict": {
|
||||
# "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
|
||||
# "simple_cuda_graph": True
|
||||
# },
|
||||
# "block_size": 32,
|
||||
# },
|
||||
"corex": {
|
||||
"compilation_config_dict": {
|
||||
"cudagraph_mode": "FULL_DECODE_ONLY",
|
||||
"level": 0
|
||||
},
|
||||
},
|
||||
"kxpu": {
|
||||
"compilation_config_dict": {
|
||||
"splitting_ops": [
|
||||
"vllm.unified_attention", "vllm.unified_attention_with_output",
|
||||
"vllm.unified_attention_with_output_kunlun", "vllm.mamba_mixer2",
|
||||
"vllm.mamba_mixer", "vllm.short_conv", "vllm.linear_attention",
|
||||
"vllm.plamo2_mamba_mixer", "vllm.gdn_attention", "vllm.sparse_attn_indexer"
|
||||
]
|
||||
},
|
||||
"block_size": 128,
|
||||
"dtype": "float16",
|
||||
"distributed_executor_backend": "mp",
|
||||
},
|
||||
}
|
||||
|
||||
return DEVICE_CONFIGS.get(device_type.lower())
|
||||
|
||||
|
||||
def _check_server_arg_exists(args: list, arg_name: str) -> bool:
|
||||
"""检查命令行参数列表中是否已存在指定参数"""
|
||||
return any(arg == f"--{arg_name}" or arg.startswith(f"--{arg_name}=") for arg in args)
|
||||
|
||||
|
||||
def _add_server_arg_if_missing(args: list, arg_name: str, value: str) -> None:
|
||||
"""如果参数不存在,则添加到命令行参数列表"""
|
||||
if not _check_server_arg_exists(args, arg_name):
|
||||
args.extend([f"--{arg_name}", value])
|
||||
|
||||
|
||||
def _add_engine_kwarg_if_missing(kwargs: dict, key: str, value) -> None:
|
||||
"""如果参数不存在,则添加到 kwargs 字典"""
|
||||
if key not in kwargs:
|
||||
kwargs[key] = value
|
||||
|
||||
|
||||
def mod_kwargs_by_device_type(kwargs_or_args: dict | list, vllm_mode: str) -> dict | list:
|
||||
"""根据设备类型修改 vllm 配置参数
|
||||
|
||||
Args:
|
||||
kwargs_or_args: 配置参数,server 模式为 list,engine 模式为 dict
|
||||
vllm_mode: vllm 运行模式 ("server", "sync_engine", "async_engine")
|
||||
|
||||
Returns:
|
||||
修改后的配置参数
|
||||
"""
|
||||
device_type = os.getenv("MINERU_VLLM_DEVICE", "")
|
||||
config = _get_device_config(device_type)
|
||||
|
||||
if config is None:
|
||||
return kwargs_or_args
|
||||
|
||||
if vllm_mode == "server":
|
||||
_apply_server_config(kwargs_or_args, config)
|
||||
else:
|
||||
_apply_engine_config(kwargs_or_args, config, vllm_mode)
|
||||
|
||||
return kwargs_or_args
|
||||
|
||||
|
||||
def _apply_server_config(args: list, config: dict) -> None:
|
||||
"""应用 server 模式的配置"""
|
||||
import json
|
||||
|
||||
for key, value in config.items():
|
||||
if key == "compilation_config_dict":
|
||||
_add_server_arg_if_missing(
|
||||
args, "compilation-config",
|
||||
json.dumps(value, separators=(',', ':'))
|
||||
)
|
||||
else:
|
||||
# 转换 key 格式: block_size -> block-size
|
||||
arg_name = key.replace("_", "-")
|
||||
_add_server_arg_if_missing(args, arg_name, str(value))
|
||||
|
||||
|
||||
def _apply_engine_config(kwargs: dict, config: dict, vllm_mode: str) -> None:
|
||||
"""应用 engine 模式的配置"""
|
||||
try:
|
||||
from vllm.config import CompilationConfig
|
||||
except ImportError:
|
||||
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
||||
|
||||
for key, value in config.items():
|
||||
if key == "compilation_config_dict":
|
||||
if vllm_mode == "sync_engine":
|
||||
compilation_config = value
|
||||
elif vllm_mode == "async_engine":
|
||||
compilation_config = CompilationConfig(**value)
|
||||
else:
|
||||
continue
|
||||
_add_engine_kwarg_if_missing(kwargs, "compilation_config", compilation_config)
|
||||
else:
|
||||
_add_engine_kwarg_if_missing(kwargs, key, value)
|
||||
|
||||
@@ -6,7 +6,7 @@ import json
|
||||
from loguru import logger
|
||||
|
||||
from .utils import enable_custom_logits_processors, set_default_gpu_memory_utilization, set_default_batch_size, \
|
||||
set_lmdeploy_backend
|
||||
set_lmdeploy_backend, mod_kwargs_by_device_type
|
||||
from .model_output_to_middle_json import result_to_middle_json
|
||||
from ...data.data_reader_writer import DataWriter
|
||||
from mineru.utils.pdf_image_tools import load_images_from_pdf
|
||||
@@ -101,27 +101,7 @@ class ModelSingleton:
|
||||
except ImportError:
|
||||
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
||||
|
||||
# musa vllm v1 引擎特殊配置
|
||||
# device = get_device()
|
||||
# if device_type.startswith("musa"):
|
||||
# import torch
|
||||
# if torch.musa.is_available():
|
||||
# compilation_config = {
|
||||
# "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
|
||||
# "simple_cuda_graph": True
|
||||
# }
|
||||
# block_size = 32
|
||||
# kwargs["compilation_config"] = compilation_config
|
||||
# kwargs["block_size"] = block_size
|
||||
|
||||
# corex vllm v1 引擎特殊配置
|
||||
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||
if device_type.lower() == "corex":
|
||||
compilation_config = {
|
||||
"cudagraph_mode": "FULL_DECODE_ONLY",
|
||||
"level": 0
|
||||
}
|
||||
kwargs["compilation_config"] = compilation_config
|
||||
kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="sync_engine")
|
||||
|
||||
if "compilation_config" in kwargs:
|
||||
if isinstance(kwargs["compilation_config"], str):
|
||||
@@ -148,28 +128,7 @@ class ModelSingleton:
|
||||
except ImportError:
|
||||
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
||||
|
||||
|
||||
# musa vllm v1 引擎特殊配置
|
||||
# device = get_device()
|
||||
# if device.startswith("musa"):
|
||||
# import torch
|
||||
# if torch.musa.is_available():
|
||||
# compilation_config = CompilationConfig(
|
||||
# cudagraph_capture_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
|
||||
# simple_cuda_graph=True
|
||||
# )
|
||||
# block_size = 32
|
||||
# kwargs["compilation_config"] = compilation_config
|
||||
# kwargs["block_size"] = block_size
|
||||
|
||||
# corex vllm v1 引擎特殊配置
|
||||
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||
if device_type.lower() == "corex":
|
||||
compilation_config = CompilationConfig(
|
||||
cudagraph_mode="FULL_DECODE_ONLY",
|
||||
level=0
|
||||
)
|
||||
kwargs["compilation_config"] = compilation_config
|
||||
kwargs = mod_kwargs_by_device_type(kwargs, vllm_mode="async_engine")
|
||||
|
||||
if "compilation_config" in kwargs:
|
||||
if isinstance(kwargs["compilation_config"], dict):
|
||||
|
||||
@@ -89,7 +89,11 @@ class FormulaRecognizer(BaseOCRV20):
|
||||
return rec_formula
|
||||
|
||||
def batch_predict(
|
||||
self, images_mfd_res: list, images: list, batch_size: int = 64
|
||||
self,
|
||||
images_mfd_res: list,
|
||||
images: list,
|
||||
batch_size: int = 64,
|
||||
interline_enable: bool = True,
|
||||
) -> list:
|
||||
images_formula_list = []
|
||||
mf_image_list = []
|
||||
@@ -105,6 +109,8 @@ class FormulaRecognizer(BaseOCRV20):
|
||||
for idx, (xyxy, conf, cla) in enumerate(
|
||||
zip(mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls)
|
||||
):
|
||||
if not interline_enable and cla.item() == 1:
|
||||
continue # Skip interline regions if not enabled
|
||||
xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
|
||||
new_item = {
|
||||
"category_id": 13 + int(cla.item()),
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors
|
||||
from mineru.utils.config_reader import get_device
|
||||
from mineru.backend.vlm.utils import set_default_gpu_memory_utilization, enable_custom_logits_processors, \
|
||||
mod_kwargs_by_device_type
|
||||
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
||||
|
||||
from vllm.entrypoints.cli.main import main as vllm_main
|
||||
@@ -14,8 +14,6 @@ def main():
|
||||
has_port_arg = False
|
||||
has_gpu_memory_utilization_arg = False
|
||||
has_logits_processors_arg = False
|
||||
has_block_size_arg = False
|
||||
has_compilation_config = False
|
||||
model_path = None
|
||||
model_arg_indices = []
|
||||
|
||||
@@ -27,10 +25,6 @@ def main():
|
||||
has_gpu_memory_utilization_arg = True
|
||||
if arg == "--logits-processors" or arg.startswith("--logits-processors="):
|
||||
has_logits_processors_arg = True
|
||||
if arg == "--block-size" or arg.startswith("--block-size="):
|
||||
has_block_size_arg = True
|
||||
if arg == "--compilation-config" or arg.startswith("--compilation-config="):
|
||||
has_compilation_config = True
|
||||
if arg == "--model":
|
||||
if i + 1 < len(args):
|
||||
model_path = args[i + 1]
|
||||
@@ -57,21 +51,7 @@ def main():
|
||||
if (not has_logits_processors_arg) and custom_logits_processors:
|
||||
args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
|
||||
|
||||
# musa vllm v1 引擎特殊配置
|
||||
# device = get_device()
|
||||
# if device.startswith("musa"):
|
||||
# import torch
|
||||
# if torch.musa.is_available():
|
||||
# if not has_block_size_arg:
|
||||
# args.extend(["--block-size", "32"])
|
||||
# if not has_compilation_config:
|
||||
# args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}'])
|
||||
|
||||
# corex vllm v1 引擎特殊配置
|
||||
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||
if device_type.lower() == "corex":
|
||||
if not has_compilation_config:
|
||||
args.extend(["--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}'])
|
||||
args = mod_kwargs_by_device_type(args, vllm_mode="server")
|
||||
|
||||
# 重构参数,将模型路径作为位置参数
|
||||
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
||||
|
||||
Reference in New Issue
Block a user