mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
feat: add Dockerfile for corex environment setup and update vllm server configurations
This commit is contained in:
27
docker/china/corex.Dockerfile
Normal file
27
docker/china/corex.Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + iluvatar GPU.
|
||||||
|
FROM
|
||||||
|
|
||||||
|
|
||||||
|
# Install Noto fonts for Chinese characters
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
fonts-noto-core \
|
||||||
|
fonts-noto-cjk \
|
||||||
|
fontconfig && \
|
||||||
|
fc-cache -fv && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install mineru latest
|
||||||
|
RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
|
||||||
|
python3 -m pip install 'mineru[core]>=2.7.4' \
|
||||||
|
numpy==1.26.4 \
|
||||||
|
opencv-python==4.11.0.86 \
|
||||||
|
-i https://mirrors.aliyun.com/pypi/simple && \
|
||||||
|
python3 -m pip cache purge
|
||||||
|
|
||||||
|
# Download models and update the configuration file
|
||||||
|
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
|
||||||
|
|
||||||
|
# Set the entry point to activate the virtual environment and run the command line tool
|
||||||
|
ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
FROM harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-1226-das1.7-py3.10-20251226
|
FROM harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-1226-das1.7-py3.10-20251226
|
||||||
|
|
||||||
|
|
||||||
# Install libgl for opencv support & Noto fonts for Chinese characters
|
# Install Noto fonts for Chinese characters
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
fonts-noto-core \
|
fonts-noto-core \
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/gcu:docker_images_topsrider_i3x_3.6.20260106_vllm0.11_pytorch2.8.0
|
FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/gcu:docker_images_topsrider_i3x_3.6.20260106_vllm0.11_pytorch2.8.0
|
||||||
|
|
||||||
|
|
||||||
# Install libgl for opencv support & Noto fonts for Chinese characters
|
# Install Noto fonts for Chinese characters
|
||||||
RUN echo 'deb http://mirrors.aliyun.com/ubuntu/ noble main restricted universe multiverse\n\
|
RUN echo 'deb http://mirrors.aliyun.com/ubuntu/ noble main restricted universe multiverse\n\
|
||||||
deb http://mirrors.aliyun.com/ubuntu/ noble-updates main restricted universe multiverse\n\
|
deb http://mirrors.aliyun.com/ubuntu/ noble-updates main restricted universe multiverse\n\
|
||||||
deb http://mirrors.aliyun.com/ubuntu/ noble-backports main restricted universe multiverse\n\
|
deb http://mirrors.aliyun.com/ubuntu/ noble-backports main restricted universe multiverse\n\
|
||||||
|
|||||||
120
docs/zh/usage/acceleration_cards/Iluvatar.md
Normal file
120
docs/zh/usage/acceleration_cards/Iluvatar.md
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
## 1. 测试平台
|
||||||
|
以下为本指南测试使用的平台信息,供参考:
|
||||||
|
```
|
||||||
|
os: Ubuntu 22.04.5 LTS
|
||||||
|
cpu: Intel x86-64
|
||||||
|
gcu: Iluvatar BI-V150
|
||||||
|
driver: 4.4.0
|
||||||
|
docker: 28.1.1
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. 环境准备
|
||||||
|
|
||||||
|
### 2.1 使用 Dockerfile 构建镜像
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/corex.Dockerfile
|
||||||
|
docker build --network=host -t mineru:corex-vllm-latest -f corex.Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 3. 启动 Docker 容器
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --name mineru_docker \
|
||||||
|
-v /usr/src:/usr/src \
|
||||||
|
-v /lib/modules:/lib/modules \
|
||||||
|
-v /dev:/dev \
|
||||||
|
--privileged \
|
||||||
|
--cap-add=ALL \
|
||||||
|
--pid=host \
|
||||||
|
--group-add video \
|
||||||
|
--network=host \
|
||||||
|
--shm-size '400gb' \
|
||||||
|
--ulimit memlock=-1 \
|
||||||
|
--security-opt seccomp=unconfined \
|
||||||
|
--security-opt apparmor=unconfined \
|
||||||
|
-e VLLM_ENFORCE_CUDA_GRAPH=1 \
|
||||||
|
-e MINERU_MODEL_SOURCE=local \
|
||||||
|
-e MINERU_LMDEPLOY_DEVICE=corex \
|
||||||
|
-it mineru:corex-vllm-latest \
|
||||||
|
/bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。
|
||||||
|
您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。
|
||||||
|
|
||||||
|
|
||||||
|
## 4. 注意事项
|
||||||
|
|
||||||
|
不同环境下,MinerU对Enflame加速卡的支持情况如下表所示:
|
||||||
|
|
||||||
|
<table border="1">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th rowspan="2" colspan="2">使用场景</th>
|
||||||
|
<th colspan="2">容器环境</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>vllm</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td rowspan="3">命令行工具(mineru)</td>
|
||||||
|
<td>pipeline</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><vlm/hybrid>-auto-engine</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><vlm/hybrid>-http-client</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td rowspan="3">fastapi服务(mineru-api)</td>
|
||||||
|
<td>pipeline</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><vlm/hybrid>-auto-engine</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><vlm/hybrid>-http-client</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td rowspan="3">gradio界面(mineru-gradio)</td>
|
||||||
|
<td>pipeline</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><vlm/hybrid>-auto-engine</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><vlm/hybrid>-http-client</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2">openai-server服务(mineru-openai-server)</td>
|
||||||
|
<td>🟢</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2">数据并行 (--data-parallel-size)</td>
|
||||||
|
<td>🔴</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
注:
|
||||||
|
🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致
|
||||||
|
🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异
|
||||||
|
🔴: 不支持,无法运行,或精度存在较大差异
|
||||||
|
|
||||||
|
>[!TIP]
|
||||||
|
>GCU加速卡指定可用加速卡的方式与NVIDIA GPU类似,请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明,
|
||||||
|
>将环境变量`CUDA_VISIBLE_DEVICES`替换为`TOPS_VISIBLE_DEVICES`即可。
|
||||||
@@ -297,7 +297,14 @@ def ocr_det_batch_setting(device):
|
|||||||
# 检测torch的版本号
|
# 检测torch的版本号
|
||||||
import torch
|
import torch
|
||||||
from packaging import version
|
from packaging import version
|
||||||
if version.parse(torch.__version__) >= version.parse("2.8.0") or str(device).startswith('mps'):
|
|
||||||
|
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||||
|
|
||||||
|
if (
|
||||||
|
version.parse(torch.__version__) >= version.parse("2.8.0")
|
||||||
|
or str(device).startswith('mps')
|
||||||
|
or device_type.lower() in ["corex"]
|
||||||
|
):
|
||||||
enable_ocr_det_batch = False
|
enable_ocr_det_batch = False
|
||||||
else:
|
else:
|
||||||
enable_ocr_det_batch = True
|
enable_ocr_det_batch = True
|
||||||
|
|||||||
@@ -193,7 +193,12 @@ def batch_image_analyze(
|
|||||||
# 检测torch的版本号
|
# 检测torch的版本号
|
||||||
import torch
|
import torch
|
||||||
from packaging import version
|
from packaging import version
|
||||||
if version.parse(torch.__version__) >= version.parse("2.8.0") or str(device).startswith('mps'):
|
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||||
|
if (
|
||||||
|
version.parse(torch.__version__) >= version.parse("2.8.0")
|
||||||
|
or str(device).startswith('mps')
|
||||||
|
or device_type.lower() in ["corex"]
|
||||||
|
):
|
||||||
enable_ocr_det_batch = False
|
enable_ocr_det_batch = False
|
||||||
else:
|
else:
|
||||||
enable_ocr_det_batch = True
|
enable_ocr_det_batch = True
|
||||||
|
|||||||
@@ -101,20 +101,27 @@ class ModelSingleton:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
||||||
|
|
||||||
"""
|
|
||||||
# musa vllm v1 引擎特殊配置
|
# musa vllm v1 引擎特殊配置
|
||||||
device = get_device()
|
# device = get_device()
|
||||||
if device.startswith("musa"):
|
# if device_type.startswith("musa"):
|
||||||
import torch
|
# import torch
|
||||||
if torch.musa.is_available():
|
# if torch.musa.is_available():
|
||||||
compilation_config = {
|
# compilation_config = {
|
||||||
"cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
|
# "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
|
||||||
"simple_cuda_graph": True
|
# "simple_cuda_graph": True
|
||||||
}
|
# }
|
||||||
block_size = 32
|
# block_size = 32
|
||||||
kwargs["compilation_config"] = compilation_config
|
# kwargs["compilation_config"] = compilation_config
|
||||||
kwargs["block_size"] = block_size
|
# kwargs["block_size"] = block_size
|
||||||
"""
|
|
||||||
|
# corex vllm v1 引擎特殊配置
|
||||||
|
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||||
|
if device_type.lower() == "corex":
|
||||||
|
compilation_config = {
|
||||||
|
"cudagraph_mode": "FULL_DECODE_ONLY",
|
||||||
|
"level": 0
|
||||||
|
}
|
||||||
|
kwargs["compilation_config"] = compilation_config
|
||||||
|
|
||||||
if "compilation_config" in kwargs:
|
if "compilation_config" in kwargs:
|
||||||
if isinstance(kwargs["compilation_config"], str):
|
if isinstance(kwargs["compilation_config"], str):
|
||||||
|
|||||||
@@ -56,17 +56,22 @@ def main():
|
|||||||
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
||||||
if (not has_logits_processors_arg) and custom_logits_processors:
|
if (not has_logits_processors_arg) and custom_logits_processors:
|
||||||
args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
|
args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
|
||||||
"""
|
|
||||||
# musa vllm v1 引擎特殊配置
|
# musa vllm v1 引擎特殊配置
|
||||||
device = get_device()
|
# device = get_device()
|
||||||
if device.startswith("musa"):
|
# if device.startswith("musa"):
|
||||||
import torch
|
# import torch
|
||||||
if torch.musa.is_available():
|
# if torch.musa.is_available():
|
||||||
if not has_block_size_arg:
|
# if not has_block_size_arg:
|
||||||
args.extend(["--block-size", "32"])
|
# args.extend(["--block-size", "32"])
|
||||||
if not has_compilation_config:
|
# if not has_compilation_config:
|
||||||
args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}'])
|
# args.extend(["--compilation-config", '{"cudagraph_capture_sizes": [1,2,3,4,5,6,7,8,10,12,14,16,18,20,24,28,30], "simple_cuda_graph": true}'])
|
||||||
"""
|
|
||||||
|
# corex vllm v1 引擎特殊配置
|
||||||
|
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
|
||||||
|
if device_type.lower() == "corex":
|
||||||
|
if not has_compilation_config:
|
||||||
|
args.extend(["--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}'])
|
||||||
|
|
||||||
# 重构参数,将模型路径作为位置参数
|
# 重构参数,将模型路径作为位置参数
|
||||||
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
||||||
|
|||||||
Reference in New Issue
Block a user