Compare commits

...

70 Commits

Author SHA1 Message Date
Xiaomeng Zhao
13c23c475d Merge pull request #2681 from opendatalab/dev
Dev
2025-06-17 12:06:33 +08:00
Xiaomeng Zhao
039cf27fd5 Merge pull request #2680 from myhloli/dev
fix: include model_path in key for backend model retrieval in vlm_analyze.py
2025-06-17 12:05:13 +08:00
myhloli
72c946a9ec fix: include model_path in key for backend model retrieval in vlm_analyze.py 2025-06-17 12:03:07 +08:00
Xiaomeng Zhao
1bec25b1e1 Merge pull request #2679 from myhloli/dev
fix: support NPU device in UnimernetModel initialization
2025-06-17 11:26:08 +08:00
myhloli
2785f60424 fix: support NPU device in UnimernetModel initialization 2025-06-17 11:24:43 +08:00
myhloli
3cdcd76c34 docs: update changelog for version 2.0.4 and add installation instructions for mineru client 2025-06-17 11:21:46 +08:00
Xiaomeng Zhao
252255166f Merge pull request #2676 from hotelll/patch-1
correct the chat template of sglang_server
2025-06-17 11:16:58 +08:00
He Tianyao
8deb3b5253 correct server chat template
MinerU2.0's chat template during training has additional "\n" compared with chatml's. This difference may slightly affect the performance under server mode. (because sglang_server use sglang's chat template).
2025-06-17 10:56:54 +08:00
Xiaomeng Zhao
f34644eb61 Merge pull request #2675 from myhloli/dev
add new enum values and improve MIN_BATCH_INFERENCE_SIZE documentation in pipeline_analyze.py
2025-06-17 02:04:04 +08:00
myhloli
5b88eba7a1 Merge remote-tracking branch 'origin/dev' into dev 2025-06-17 02:01:35 +08:00
myhloli
58b8e8a912 fix: add new enum values and improve MIN_BATCH_INFERENCE_SIZE documentation in pipeline_analyze.py 2025-06-17 02:01:24 +08:00
Xiaomeng Zhao
7960d8b799 Merge pull request #2668 from myhloli/dev
fix: include server_url in model key for backend model retrieval in vlm_analyze.py
2025-06-16 16:59:17 +08:00
myhloli
20dcbd2164 fix: include server_url in model key for backend model retrieval in vlm_analyze.py 2025-06-16 16:58:09 +08:00
Xiaomeng Zhao
e4e58fa2de Merge pull request #2667 from myhloli/dev
fix: refine model path condition for transformers and sglang-engine backends in vlm_analyze.py
2025-06-16 16:52:13 +08:00
myhloli
013ddc02b2 fix: refine model path condition for transformers and sglang-engine backends in vlm_analyze.py 2025-06-16 16:50:52 +08:00
Xiaomeng Zhao
390ddd8b96 Merge pull request #2660 from opendatalab/dev
fix: correct syntax error in demo.py for VLM client backend
2025-06-15 18:21:24 +08:00
Xiaomeng Zhao
a1e377be05 Merge pull request #2659 from myhloli/dev
fix: correct syntax error in demo.py for VLM client backend
2025-06-15 18:20:05 +08:00
myhloli
b7d7a1bf99 fix: correct syntax error in demo.py for VLM client backend 2025-06-15 18:19:19 +08:00
Xiaomeng Zhao
d71c7f3a7d Merge pull request #2656 from opendatalab/master
master->dev
2025-06-15 11:32:07 +08:00
Xiaomeng Zhao
2abcc43493 Merge pull request #2655 from opendatalab/dev
update doc
2025-06-15 11:29:37 +08:00
Xiaomeng Zhao
f07ac1c8a2 Merge pull request #2654 from myhloli/dev
update doc
2025-06-15 11:29:04 +08:00
myhloli
af6a2166fe fix: improve clarity in changelog entries for feature toggle issues and local deployment 2025-06-15 11:27:42 +08:00
myhloli
0f7d960885 Update version.py with new version 2025-06-15 03:12:31 +00:00
Xiaomeng Zhao
9f0008acff Merge pull request #2653 from opendatalab/release-2.0.3
Release 2.0.3
2025-06-15 11:10:10 +08:00
Xiaomeng Zhao
ccd2a71fbb Merge pull request #2652 from myhloli/dev
fix: update sglang version to 0.4.7 and adjust changelog for compatibility issues
2025-06-15 11:07:17 +08:00
myhloli
e1181ba814 fix: update sglang version to 0.4.7 and adjust changelog for compatibility issues 2025-06-15 11:03:07 +08:00
myhloli
6717712e91 Merge remote-tracking branch 'origin/dev' into dev 2025-06-15 10:57:52 +08:00
myhloli
a49c605f12 fix: update request handling to pass state from send_one_request to wait_one_response 2025-06-15 10:57:30 +08:00
Xiaomeng Zhao
740c5f6f5c Merge pull request #2650 from opendatalab/master
master->dev
2025-06-15 03:28:45 +08:00
myhloli
8230995a50 Update version.py with new version 2025-06-14 19:12:08 +00:00
Xiaomeng Zhao
8cf86e3818 Merge pull request #2649 from opendatalab/release-2.0.2
Release 2.0.2
2025-06-15 03:10:30 +08:00
Xiaomeng Zhao
648bd66056 Merge pull request #2648 from myhloli/dev
Dev
2025-06-15 03:07:16 +08:00
myhloli
55cb4c9064 feat: add Docker Compose configuration and update README for container startup 2025-06-15 03:05:54 +08:00
myhloli
f51c1acc04 docs: update README files with Docker run command for GPU support 2025-06-15 02:40:08 +08:00
myhloli
1dc7d603dd fix: add handling for copyright symbol in regex replacements 2025-06-15 02:24:38 +08:00
myhloli
61c1bb9258 fix: remove unused import of torch in client.py 2025-06-15 02:20:15 +08:00
myhloli
4e2a562231 fix: simplify formula enable handling by removing redundant function call 2025-06-15 02:16:28 +08:00
myhloli
1383787bad fix: refactor formula and table enable handling to use environment variables 2025-06-15 02:05:30 +08:00
myhloli
9b279553df chore: update changelog for version 2.0.2 release with bug fixes and Dockerfile updates 2025-06-15 01:50:59 +08:00
myhloli
4cb28fdf96 fix: update Dockerfile and README files to use updated mineru installation commands 2025-06-15 01:39:08 +08:00
myhloli
f2ff3472a0 fix: update Dockerfile to use sglang base image and adjust mineru installation 2025-06-15 01:03:57 +08:00
myhloli
1fa55b7629 Merge remote-tracking branch 'origin/dev' into dev 2025-06-15 00:52:26 +08:00
myhloli
98b8c4a967 refactor: streamline formula and table enable configurations in the pipeline 2025-06-15 00:51:13 +08:00
Xiaomeng Zhao
f1997b49b2 Merge pull request #2646 from opendatalab/master
master->dev
2025-06-14 22:29:23 +08:00
myhloli
52de921c11 Update version.py with new version 2025-06-14 14:28:04 +00:00
Xiaomeng Zhao
19a31cce39 Merge pull request #2645 from opendatalab/release-2.0.1
Release 2.0.1
2025-06-14 22:24:05 +08:00
Xiaomeng Zhao
d77d5edddb Merge pull request #2644 from myhloli/dev
fix: update configure_model to use environment variable for config file name
2025-06-14 22:22:20 +08:00
myhloli
a812ae899e Merge remote-tracking branch 'origin/dev' into dev 2025-06-14 22:20:18 +08:00
myhloli
1793bdfc7d fix: update configure_model to use environment variable for config file name 2025-06-14 22:20:07 +08:00
Xiaomeng Zhao
823063d743 Merge pull request #2639 from opendatalab/dev
Update Acknowledgments
2025-06-14 10:29:02 +08:00
Xiaomeng Zhao
6e54a68cef Merge pull request #2638 from myhloli/dev
fix: add pdftext link to README and README_zh-CN for completeness
2025-06-14 10:27:55 +08:00
myhloli
6e39928204 fix: add pdftext link to README and README_zh-CN for completeness 2025-06-14 10:26:49 +08:00
Xiaomeng Zhao
5c8f6b84ce Merge pull request #2636 from opendatalab/dev
update docs
2025-06-13 22:18:21 +08:00
Xiaomeng Zhao
52537958ec Merge pull request #2635 from myhloli/dev
fix: update Table of Contents in README and README_zh-CN for clarity and consistency
2025-06-13 22:17:26 +08:00
myhloli
d8989ed116 fix: update Table of Contents in README and README_zh-CN for clarity and consistency 2025-06-13 22:15:53 +08:00
github-actions[bot]
be80200a82 @Ar-Hyk has signed the CLA in opendatalab/MinerU#2634 2025-06-13 14:02:32 +00:00
Xiaomeng Zhao
9669111faf Merge pull request #2633 from myhloli/dev
fix: revert json_url in configure_model to use original MinerU template
2025-06-13 21:09:23 +08:00
myhloli
fdca2c8ef0 Merge remote-tracking branch 'origin/dev' into dev 2025-06-13 21:08:20 +08:00
myhloli
91208fb1bd fix: revert json_url in configure_model to use original MinerU template 2025-06-13 21:08:11 +08:00
Xiaomeng Zhao
3376f3a7d9 Merge pull request #2632 from opendatalab/master
master->dev
2025-06-13 21:05:32 +08:00
Xiaomeng Zhao
c5480b9d39 Merge pull request #2631 from opendatalab/release-2.0.0
Release 2.0.0
2025-06-13 20:38:26 +08:00
myhloli
97c1362e3c Update version.py with new version 2025-06-13 12:31:41 +00:00
Xiaomeng Zhao
28588d7c65 Merge pull request #2628 from opendatalab/release-2.0.0
Release 2.0.0
2025-06-13 20:29:04 +08:00
Xiaomeng Zhao
6ab123487b Merge pull request #2625 from opendatalab/release-2.0.0
Release 2.0.0
2025-06-13 20:21:52 +08:00
github-actions[bot]
9487d33d7b @YanzhenHuang has signed the CLA in opendatalab/MinerU#2620 2025-06-13 04:22:59 +00:00
github-actions[bot]
46f7e0f532 @AdrianWangs has signed the CLA in opendatalab/MinerU#2578 2025-06-05 11:30:53 +00:00
github-actions[bot]
efba5d4594 @PairZhu has signed the CLA in opendatalab/MinerU#2566 2025-06-04 02:39:52 +00:00
github-actions[bot]
a911c29fbb @liuzhenghua has signed the CLA in opendatalab/MinerU#2550 2025-05-30 02:57:16 +00:00
github-actions[bot]
0ac5623ad6 @seedclaimer has signed the CLA in opendatalab/MinerU#2536 2025-05-28 12:50:37 +00:00
Xiaomeng Zhao
113a3ad91f Create SECURITY.md 2025-05-28 11:22:55 +08:00
23 changed files with 337 additions and 214 deletions

133
README.md
View File

@@ -51,6 +51,14 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
</div>
# Changelog
- 2025/06/17 2.0.4 Released
- Fixed the issue where models were still required to be downloaded in the `sglang-client` mode
- Fixed the issue where only the first instance would take effect when attempting to launch multiple `sglang-client` instances via multiple URLs within the same process
- 2025/06/15 2.0.3 released
- Fixed a configuration file key-value update error that occurred when downloading model type was set to `all`
- Fixed the issue where the formula and table feature toggle switches were not working in `command line mode`, causing the features to remain enabled.
- Fixed compatibility issues with sglang version 0.4.7 in the `sglang-engine` mode.
- Updated Dockerfile and installation documentation for deploying the full version of MinerU in sglang environment
- 2025/06/13 2.0.0 Released
- MinerU 2.0 represents a comprehensive reconstruction and upgrade from architecture to functionality, delivering a more streamlined design, enhanced performance, and more flexible user experience.
- **New Architecture**: MinerU 2.0 has been deeply restructured in code organization and interaction methods, significantly improving system usability, maintainability, and extensibility.
@@ -347,48 +355,38 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
<details>
<summary>2024/07/05 Initial open-source release</summary>
</details>
</details>
<!-- TABLE OF CONTENT -->
<!-- TABLE OF CONTENT -->
<details open="open">
<summary><h2 style="display: inline-block">Table of Contents</h2></summary>
<ol>
<li>
<a href="#mineru">MinerU</a>
<ul>
<li><a href="#project-introduction">Project Introduction</a></li>
<li><a href="#key-features">Key Features</a></li>
<li><a href="#quick-start">Quick Start</a>
<ul>
<li><a href="#online-demo">Online Demo</a></li>
<li><a href="#quick-cpu-demo">Quick CPU Demo</a></li>
<li><a href="#using-gpu">Using GPU</a></li>
<li><a href="#using-npu">Using NPU</a></li>
</ul>
</li>
<li><a href="#usage">Usage</a>
<ul>
<li><a href="#command-line">Command Line</a></li>
<li><a href="#api">API</a></li>
<li><a href="#deploy-derived-projects">Deploy Derived Projects</a></li>
<li><a href="#development-guide">Development Guide</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#todo">TODO</a></li>
<li><a href="#known-issues">Known Issues</a></li>
<li><a href="#faq">FAQ</a></li>
<li><a href="#all-thanks-to-our-contributors">All Thanks To Our Contributors</a></li>
<li><a href="#license-information">License Information</a></li>
<li><a href="#acknowledgments">Acknowledgments</a></li>
<li><a href="#citation">Citation</a></li>
<li><a href="#star-history">Star History</a></li>
<li><a href="#magic-doc">Magic-doc</a></li>
<li><a href="#magic-html">Magic-html</a></li>
<li><a href="#links">Links</a></li>
</ol>
</details>
<details open="open">
<summary><h2 style="display: inline-block">Table of Contents</h2></summary>
<ol>
<li>
<a href="#mineru">MinerU</a>
<ul>
<li><a href="#project-introduction">Project Introduction</a></li>
<li><a href="#key-features">Key Features</a></li>
<li><a href="#quick-start">Quick Start</a>
<ul>
<li><a href="#online-demo">Online Demo</a></li>
<li><a href="#local-deployment">Local Deployment</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#todo">TODO</a></li>
<li><a href="#known-issues">Known Issues</a></li>
<li><a href="#faq">FAQ</a></li>
<li><a href="#all-thanks-to-our-contributors">All Thanks To Our Contributors</a></li>
<li><a href="#license-information">License Information</a></li>
<li><a href="#acknowledgments">Acknowledgments</a></li>
<li><a href="#citation">Citation</a></li>
<li><a href="#star-history">Star History</a></li>
<li><a href="#magic-doc">Magic-doc</a></li>
<li><a href="#magic-html">Magic-html</a></li>
<li><a href="#links">Links</a></li>
</ol>
</details>
# MinerU
@@ -492,7 +490,7 @@ There are three different ways to experience MinerU:
```bash
pip install --upgrade pip
pip install uv
uv pip install "mineru[core]>=2.0.0"
uv pip install -U "mineru[core]"
```
#### 1.2 Install from source
@@ -503,18 +501,47 @@ cd MinerU
uv pip install -e .[core]
```
#### 1.3 Install full version (with sglang acceleration)
#### 1.3 Install the Full Version (Supports sglang Acceleration)
To use **sglang acceleration for VLM model inference**, install the full version:
If you need to use **sglang to accelerate VLM model inference**, you can choose any of the following methods to install the full version:
- Install using uv or pip:
```bash
uv pip install -U "mineru[all]"
```
- Install from source:
```bash
uv pip install -e .[all]
```
- Build image using Dockerfile:
```bash
wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/global/Dockerfile
docker build -t mineru-sglang:latest -f Dockerfile .
```
Start Docker container:
```bash
docker run --gpus all \
--shm-size 32g \
-p 30000:30000 \
--ipc=host \
mineru-sglang:latest \
mineru-sglang-server --host 0.0.0.0 --port 30000
```
Or start using Docker Compose:
```bash
wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/compose.yaml
docker compose -f compose.yaml up -d
```
> [!TIP]
> The Dockerfile uses `lmsysorg/sglang:v0.4.7-cu124` as the default base image. If necessary, you can modify it to another platform version.
#### 1.4 Install client (for connecting to sglang-server on edge devices that require only CPU and network connectivity)
```bash
uv pip install "mineru[all]>=2.0.0"
```
Or install from source:
```bash
uv pip install -e .[all]
uv pip install -U mineru
mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
```
---
@@ -639,7 +666,8 @@ mineru-sglang-server --port 30000
mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://127.0.0.1:30000
```
> 💡 For more information about output files, please refer to [Output File Documentation](docs/output_file_en_us.md)
> [!TIP]
> For more information about output files, please refer to [Output File Documentation](docs/output_file_en_us.md)
---
@@ -717,6 +745,7 @@ Currently, some models in this project are trained based on YOLO. However, since
- [xy-cut](https://github.com/Sanster/xy-cut)
- [fast-langdetect](https://github.com/LlmKira/fast-langdetect)
- [pypdfium2](https://github.com/pypdfium2-team/pypdfium2)
- [pdftext](https://github.com/datalab-to/pdftext)
- [pdfminer.six](https://github.com/pdfminer/pdfminer.six)
- [pypdf](https://github.com/py-pdf/pypdf)

View File

@@ -50,6 +50,14 @@
</div>
# 更新记录
- 2025/06/17 2.0.4发布
- 修复了`sglang-client`模式下依然需要下载模型的问题
- 修复了同一进程内尝试通过多个url启动多个`sglang-client`实例时,只有第一个生效的问题
- 2025/06/15 2.0.3发布
- 修复了当下载模型类型设置为`all`时,配置文件出现键值更新错误的问题
- 修复了命令行模式下公式和表格功能开关不生效导致功能无法关闭的问题
- 修复了`sglang-engine`模式下0.4.7版本sglang的兼容性问题
- 更新了sglang环境下部署完整版MinerU的Dockerfile和相关安装文档
- 2025/06/13 2.0.0发布
- MinerU 2.0 是一次从架构到功能的全面重构与升级,带来了更简洁的设计、更强的性能以及更灵活的使用体验。
- **全新架构**MinerU 2.0 在代码结构和交互方式上进行了深度重构,显著提升了系统的易用性、可维护性与扩展能力。
@@ -336,49 +344,38 @@
<details>
<summary>2024/07/05 首次开源</summary>
</details>
<!-- TABLE OF CONTENT -->
<details open="open">
<summary><h2 style="display: inline-block">文档目录</h2></summary>
<ol>
<li>
<a href="#mineru">MinerU</a>
<ul>
<li><a href="#项目简介">项目简介</a></li>
<li><a href="#主要功能">主要功能</a></li>
<li><a href="#快速开始">快速开始</a>
<ul>
<li><a href="#在线体验">在线体验</a></li>
<li><a href="#使用CPU快速体验">使用CPU快速体验</a></li>
<li><a href="#使用GPU">使用GPU</a></li>
<li><a href="#使用NPU">使用NPU</a></li>
</ul>
</li>
<li><a href="#使用">使用方式</a>
<ul>
<li><a href="#命令行">命令行</a></li>
<li><a href="#api">API</a></li>
<li><a href="#部署衍生项目">部署衍生项目</a></li>
<li><a href="#二次开发">二次开发</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#todo">TODO</a></li>
<li><a href="#known-issues">Known Issues</a></li>
<li><a href="#faq">FAQ</a></li>
<li><a href="#all-thanks-to-our-contributors">Contributors</a></li>
<li><a href="#license-information">License Information</a></li>
<li><a href="#acknowledgments">Acknowledgements</a></li>
<li><a href="#citation">Citation</a></li>
<li><a href="#star-history">Star History</a></li>
<li><a href="#magic-doc">magic-doc快速提取PPT/DOC/PDF</a></li>
<li><a href="#magic-html">magic-html提取混合网页内容</a></li>
<li><a href="#links">Links</a></li>
</ol>
</details>
</details>
<!-- TABLE OF CONTENT -->
<details open="open">
<summary><h2 style="display: inline-block">文档目录</h2></summary>
<ol>
<li>
<a href="#mineru">MinerU</a>
<ul>
<li><a href="#项目简介">项目简介</a></li>
<li><a href="#主要功能">主要功能</a></li>
<li><a href="#快速开始">快速开始</a>
<ul>
<li><a href="#在线体验">在线体验</a></li>
<li><a href="#本地部署">本地部署</a></li>
</ul>
</ul>
</li>
<li><a href="#todo">TODO</a></li>
<li><a href="#known-issues">Known Issues</a></li>
<li><a href="#faq">FAQ</a></li>
<li><a href="#all-thanks-to-our-contributors">Contributors</a></li>
<li><a href="#license-information">License Information</a></li>
<li><a href="#acknowledgments">Acknowledgements</a></li>
<li><a href="#citation">Citation</a></li>
<li><a href="#star-history">Star History</a></li>
<li><a href="#magic-doc">magic-doc快速提取PPT/DOC/PDF</a></li>
<li><a href="#magic-html">magic-html提取混合网页内容</a></li>
<li><a href="#links">Links</a></li>
</ol>
</details>
@@ -483,7 +480,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
```bash
pip install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple
pip install uv -i https://mirrors.aliyun.com/pypi/simple
uv pip install "mineru[core]>=2.0.0" -i https://mirrors.aliyun.com/pypi/simple
uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple
```
#### 1.2 源码安装
@@ -496,16 +493,44 @@ uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
#### 1.3 安装完整版(支持 sglang 加速)
如需使用 **sglang 加速 VLM 模型推理**,请安装完整版本:
如需使用 **sglang 加速 VLM 模型推理**,请选择合适的方式安装完整版本:
- 使用uv或pip安装
```bash
uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple
```
- 从源码安装:
```bash
uv pip install -e .[all] -i https://mirrors.aliyun.com/pypi/simple
```
- 使用 Dockerfile 构建镜像:
```bash
wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/Dockerfile
docker build -t mineru-sglang:latest -f Dockerfile .
```
启动 Docker 容器:
```bash
docker run --gpus all \
--shm-size 32g \
-p 30000:30000 \
--ipc=host \
mineru-sglang:latest \
mineru-sglang-server --host 0.0.0.0 --port 30000
```
或使用 Docker Compose 启动:
```bash
wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/compose.yaml
docker compose -f compose.yaml up -d
```
> [!TIP]
> Dockerfile默认使用`lmsysorg/sglang:v0.4.7-cu124`作为基础镜像,如有需要,您可以自行修改为其他平台版本。
#### 1.4 安装client用于在仅需 CPU 和网络连接的边缘设备上连接 sglang-server
```bash
uv pip install "mineru[all]>=2.0.0" -i https://mirrors.aliyun.com/pypi/simple
```
或从源码安装:
```bash
uv pip install -e .[all] -i https://mirrors.aliyun.com/pypi/simple
uv pip install -U mineru -i https://mirrors.aliyun.com/pypi/simple
mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
```
---
@@ -630,7 +655,8 @@ mineru-sglang-server --port 30000
mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://127.0.0.1:30000
```
> 💡 更多关于输出文件的信息,请参考 [输出文件说明](docs/output_file_zh_cn.md)
> [!TIP]
> 更多关于输出文件的信息,请参考 [输出文件说明](docs/output_file_zh_cn.md)
---
@@ -710,6 +736,7 @@ mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://127.0.0.1
- [xy-cut](https://github.com/Sanster/xy-cut)
- [fast-langdetect](https://github.com/LlmKira/fast-langdetect)
- [pypdfium2](https://github.com/pypdfium2-team/pypdfium2)
- [pdftext](https://github.com/datalab-to/pdftext)
- [pdfminer.six](https://github.com/pdfminer/pdfminer.six)
- [pypdf](https://github.com/py-pdf/pypdf)

31
SECURITY.md Normal file
View File

@@ -0,0 +1,31 @@
# Security Policy
## Supported Versions
latest
## Reporting a Vulnerability
Please do not report security vulnerabilities through public GitHub issues.
Instead, please report them at https://github.com/opendatalab/MinerU/security.
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
## Preferred Languages
We prefer all communications to be in English and Chinese.
## Policy
We will fix security issues in the project's own code as quickly as possible. Before the project completes the fix, you must not disclose the vulnerability information to any public platform.

View File

@@ -240,4 +240,4 @@ if __name__ == '__main__':
"""To enable VLM mode, change the backend to 'vlm-xxx'"""
# parse_doc(doc_path_list, output_dir, backend="vlm-transformers") # more general.
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-engine") # faster(engine).
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-client", server_url="http://127.0.0.1:30000" # faster(client).
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-client", server_url="http://127.0.0.1:30000") # faster(client).

View File

@@ -1,33 +1,8 @@
# Use the official Ubuntu base image
FROM ubuntu:22.04
# Set environment variables to non-interactive to avoid prompts during installation
ENV DEBIAN_FRONTEND=noninteractive
# Update the package list and install necessary packages
RUN apt-get update && \
apt-get install -y \
software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y \
python3.10 \
python3.10-venv \
python3.10-distutils \
python3-pip \
wget \
git \
libgl1 \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.10 as the default python3
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
# Use the official sglang image
FROM lmsysorg/sglang:v0.4.7-cu124
# install mineru latest
RUN /bin/bash -c "pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install uv -i https://mirrors.aliyun.com/pypi/simple && \
uv pip install 'mineru[all]>=2.0.0' -i https://mirrors.aliyun.com/pypi/simple"
RUN python3 -m pip install -U 'mineru[core]' -i https://mirrors.aliyun.com/pypi/simple --break-system-packages
# Download models and update the configuration file
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"

26
docker/compose.yaml Normal file
View File

@@ -0,0 +1,26 @@
services:
mineru-sglang:
image: mineru-sglang:latest
container_name: mineru-sglang
restart: always
ports:
- 30000:30000
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-sglang-server
command:
--host 0.0.0.0
--port 30000
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]

View File

@@ -1,33 +1,8 @@
# Use the official Ubuntu base image
FROM ubuntu:22.04
# Set environment variables to non-interactive to avoid prompts during installation
ENV DEBIAN_FRONTEND=noninteractive
# Update the package list and install necessary packages
RUN apt-get update && \
apt-get install -y \
software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y \
python3.10 \
python3.10-venv \
python3.10-distutils \
python3-pip \
wget \
git \
libgl1 \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.10 as the default python3
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
# Use the official sglang image
FROM lmsysorg/sglang:v0.4.7-cu124
# install mineru latest
RUN /bin/bash -c "pip3 install --upgrade pip && \
pip3 install uv && \
uv pip install 'mineru[all]>=2.0.0'"
RUN python3 -m pip install -U 'mineru[core]' --break-system-packages
# Download models and update the configuration file
RUN /bin/bash -c "mineru-models-download -s huggingface -m all"

View File

@@ -5,6 +5,7 @@ from collections import defaultdict
import numpy as np
from .model_init import AtomModelSingleton
from ...utils.config_reader import get_formula_enable, get_table_enable
from ...utils.model_utils import crop_img, get_res_list_from_layout_res
from ...utils.ocr_utils import get_adjusted_mfdetrec_res, get_ocr_result_list, OcrConfidence
@@ -16,8 +17,8 @@ MFR_BASE_BATCH_SIZE = 16
class BatchAnalyze:
def __init__(self, model_manager, batch_ratio: int, formula_enable, table_enable, enable_ocr_det_batch: bool = True):
self.batch_ratio = batch_ratio
self.formula_enable = formula_enable
self.table_enable = table_enable
self.formula_enable = get_formula_enable(formula_enable)
self.table_enable = get_table_enable(table_enable)
self.model_manager = model_manager
self.enable_ocr_det_batch = enable_ocr_det_batch

View File

@@ -4,7 +4,7 @@ import time
from loguru import logger
from tqdm import tqdm
from mineru.utils.config_reader import get_device, get_llm_aided_config
from mineru.utils.config_reader import get_device, get_llm_aided_config, get_formula_enable
from mineru.backend.pipeline.model_init import AtomModelSingleton
from mineru.backend.pipeline.para_split import para_split
from mineru.utils.block_pre_proc import prepare_block_bboxes, process_groups
@@ -165,6 +165,7 @@ def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer
def result_to_middle_json(model_list, images_list, pdf_doc, image_writer, lang=None, ocr_enable=False, formula_enabled=True):
middle_json = {"pdf_info": [], "_backend":"pipeline", "_version_name": __version__}
formula_enabled = get_formula_enable(formula_enabled)
for page_index, page_model_info in tqdm(enumerate(model_list), total=len(model_list), desc="Processing pages"):
page = pdf_doc[page_index]
image_dict = images_list[page_index]

View File

@@ -5,7 +5,7 @@ import PIL.Image
import torch
from .model_init import MineruPipelineModel
from mineru.utils.config_reader import get_device, get_formula_config, get_table_recog_config
from mineru.utils.config_reader import get_device
from ...utils.pdf_classify import classify
from ...utils.pdf_image_tools import load_images_from_pdf
@@ -44,20 +44,15 @@ class ModelSingleton:
def custom_model_init(
lang=None,
formula_enable=None,
table_enable=None,
formula_enable=True,
table_enable=True,
):
model_init_start = time.time()
# 从配置文件读取model-dir和device
device = get_device()
formula_config = get_formula_config()
if formula_enable is not None:
formula_config['enable'] = formula_enable
table_config = get_table_recog_config()
if table_enable is not None:
table_config['enable'] = table_enable
formula_config = {"enable": formula_enable}
table_config = {"enable": table_enable}
model_input = {
'device': device,
@@ -78,10 +73,14 @@ def doc_analyze(
pdf_bytes_list,
lang_list,
parse_method: str = 'auto',
formula_enable=None,
table_enable=None,
formula_enable=True,
table_enable=True,
):
MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
"""
适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能可能会增加显存使用量
可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置默认值为100。
"""
min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
# 收集所有页面信息
all_pages_info = [] # 存储(dataset_index, page_index, img, ocr, lang, width, height)
@@ -114,7 +113,7 @@ def doc_analyze(
# 准备批处理
images_with_extra_info = [(info[2], info[3], info[4]) for info in all_pages_info]
batch_size = MIN_BATCH_INFERENCE_SIZE
batch_size = min_batch_inference_size
batch_images = [
images_with_extra_info[i:i + batch_size]
for i in range(0, len(images_with_extra_info), batch_size)
@@ -152,8 +151,8 @@ def doc_analyze(
def batch_image_analyze(
images_with_extra_info: List[Tuple[PIL.Image.Image, bool, str]],
formula_enable=None,
table_enable=None):
formula_enable=True,
table_enable=True):
# os.environ['CUDA_VISIBLE_DEVICES'] = str(idx)
from .batch_analyze import BatchAnalyze

View File

@@ -27,9 +27,9 @@ class ModelSingleton:
model_path: str | None,
server_url: str | None,
) -> BasePredictor:
key = (backend,)
key = (backend, model_path, server_url)
if key not in self._models:
if not model_path:
if backend in ['transformers', 'sglang-engine'] and not model_path:
model_path = auto_download_and_get_model_root_path("/","vlm")
self._models[key] = get_predictor(
backend=backend,

View File

@@ -2,7 +2,6 @@
import os
import click
from pathlib import Path
import torch
from loguru import logger
from mineru.utils.config_reader import get_device
@@ -140,10 +139,6 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
def main(input_path, output_dir, method, backend, lang, server_url, start_page_id, end_page_id, formula_enable, table_enable, device_mode, virtual_vram, model_source):
if os.getenv('MINERU_FORMULA_ENABLE', None) is None:
os.environ['MINERU_FORMULA_ENABLE'] = str(formula_enable).lower()
if os.getenv('MINERU_TABLE_ENABLE', None) is None:
os.environ['MINERU_TABLE_ENABLE'] = str(table_enable).lower()
def get_device_mode() -> str:
if device_mode is not None:
return device_mode
@@ -184,6 +179,8 @@ def main(input_path, output_dir, method, backend, lang, server_url, start_page_i
p_lang_list=lang_list,
backend=backend,
parse_method=method,
p_formula_enable=formula_enable,
p_table_enable=table_enable,
server_url=server_url,
start_page_id=start_page_id,
end_page_id=end_page_id

View File

@@ -115,6 +115,7 @@ def do_parse(
pdf_doc = all_pdf_docs[idx]
_lang = lang_list[idx]
_ocr_enable = ocr_enabled_list[idx]
middle_json = pipeline_result_to_middle_json(model_list, images_list, pdf_doc, image_writer, _lang, _ocr_enable, p_formula_enable)
pdf_info = middle_json["pdf_info"]

View File

@@ -42,9 +42,8 @@ def download_and_modify_json(url, local_filename, modifications):
def configure_model(model_dir, model_type):
"""配置模型"""
# json_url = 'https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/mineru.template.json'
json_url = 'https://gcore.jsdelivr.net/gh/myhloli/Magic-PDF@dev/mineru.template.json'
config_file_name = 'mineru.json'
json_url = 'https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/mineru.template.json'
config_file_name = os.getenv('MINERU_TOOLS_CONFIG_JSON', 'mineru.json')
home_dir = os.path.expanduser('~')
config_file = os.path.join(home_dir, config_file_name)
@@ -120,13 +119,13 @@ def download_models(model_source, model_type):
click.echo(f"Downloading model: {model_path}")
download_finish_path = auto_download_and_get_model_root_path(model_path, repo_mode='pipeline')
click.echo(f"Pipeline models downloaded successfully to: {download_finish_path}")
configure_model(download_finish_path, model_type)
configure_model(download_finish_path, "pipeline")
def download_vlm_models():
"""下载VLM模型"""
download_finish_path = auto_download_and_get_model_root_path("/", repo_mode='vlm')
click.echo(f"VLM models downloaded successfully to: {download_finish_path}")
configure_model(download_finish_path, model_type)
configure_model(download_finish_path, "vlm")
try:
if model_type == 'pipeline':

View File

@@ -21,7 +21,7 @@ class MathDataset(Dataset):
class UnimernetModel(object):
def __init__(self, weight_dir, _device_="cpu"):
from .unimernet_hf import UnimernetModel
if _device_.startswith("mps"):
if _device_.startswith("mps") or _device_.startswith("npu"):
self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager")
else:
self.model = UnimernetModel.from_pretrained(weight_dir)

View File

@@ -349,6 +349,7 @@ REPLACEMENTS_PATTERNS = {
re.compile(r'\\vline = '): r'\\models ',
re.compile(r'\\vDash '): r'\\models ',
re.compile(r'\\sq \\sqcup '): r'\\square ',
re.compile(r'\\copyright'): r'©',
}
QQUAD_PATTERN = re.compile(r'\\qquad(?!\s)')

View File

@@ -183,8 +183,8 @@ async def _one_request(
created_time: Optional[float],
):
tokenized_obj = await self._tokenize_one_request(obj)
self._send_one_request(obj, tokenized_obj, created_time)
async for out in self._wait_one_response(obj, request):
state = self._send_one_request(obj, tokenized_obj, created_time)
async for out in self._wait_one_response(obj, state, request):
yield out
@@ -256,8 +256,8 @@ async def _generate_request(
is_single = obj.is_single
if is_single:
tokenized_obj = await self._tokenize_one_request(obj)
self._send_one_request(obj, tokenized_obj, created_time)
async for response in self._wait_one_response(obj, request):
state = self._send_one_request(obj, tokenized_obj, created_time)
async for response in self._wait_one_response(obj, state, request):
yield response
else:
async for response in _handle_batch_request(self, obj, request, created_time):

View File

@@ -6,10 +6,26 @@ from sglang.srt.entrypoints.http_server import app, generate_request, launch_ser
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.server_args import prepare_server_args
from sglang.srt.utils import kill_process_tree
from sglang.srt.conversation import Conversation
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
from .logit_processor import Mineru2LogitProcessor
# mineru2.0的chat_template与chatml在换行上有微小区别
def custom_get_prompt(self) -> str:
system_prompt = self.system_template.format(system_message=self.system_message)
if self.system_message == "":
ret = ""
else:
ret = system_prompt + self.sep
for role, message in self.messages:
if message:
ret += role + "\n" + message + self.sep
else:
ret += role + "\n"
return ret
_custom_logit_processor_str = Mineru2LogitProcessor().to_str()
# remote the existing /generate route
@@ -45,6 +61,7 @@ def main():
if server_args.chat_template is None:
server_args.chat_template = "chatml"
Conversation.get_prompt = custom_get_prompt
server_args.enable_custom_logit_processor = True

View File

@@ -86,22 +86,16 @@ def get_device():
return "cpu"
def get_table_recog_config():
table_enable = os.getenv('MINERU_TABLE_ENABLE', None)
if table_enable is not None:
return json.loads(f'{{"enable": {table_enable}}}')
else:
# logger.warning(f"not found 'MINERU_TABLE_ENABLE' in environment variable, use 'true' as default.")
return json.loads(f'{{"enable": true}}')
def get_formula_enable(formula_enable):
formula_enable_env = os.getenv('MINERU_FORMULA_ENABLE')
formula_enable = formula_enable if formula_enable_env is None else formula_enable_env.lower() == 'true'
return formula_enable
def get_formula_config():
formula_enable = os.getenv('MINERU_FORMULA_ENABLE', None)
if formula_enable is not None:
return json.loads(f'{{"enable": {formula_enable}}}')
else:
# logger.warning(f"not found 'MINERU_FORMULA_ENABLE' in environment variable, use 'true' as default.")
return json.loads(f'{{"enable": true}}')
def get_table_enable(table_enable):
table_enable_env = os.getenv('MINERU_TABLE_ENABLE')
table_enable = table_enable if table_enable_env is None else table_enable_env.lower() == 'true'
return table_enable
def get_latex_delimiter_config():

View File

@@ -33,9 +33,11 @@ class CategoryId:
TableCaption = 6
TableFootnote = 7
InterlineEquation_Layout = 8
InterlineEquationNumber_Layout = 9
InlineEquation = 13
InterlineEquation_YOLO = 14
OcrText = 15
LowScoreText = 16
ImageFootnote = 101

View File

@@ -1 +1 @@
__version__ = "2.0.0"
__version__ = "2.0.3"

View File

@@ -43,7 +43,7 @@ vlm = [
"pydantic",
]
sglang = [
"sglang[all]>=0.4.7",
"sglang[all]==0.4.7",
]
pipeline = [
"matplotlib>=3.10,<4",

View File

@@ -263,6 +263,54 @@
"created_at": "2025-04-30T09:25:31Z",
"repoId": 765083837,
"pullRequestNo": 2411
},
{
"name": "seedclaimer",
"id": 86753366,
"comment_id": 2916194375,
"created_at": "2025-05-28T12:50:25Z",
"repoId": 765083837,
"pullRequestNo": 2536
},
{
"name": "liuzhenghua",
"id": 11787325,
"comment_id": 2921092605,
"created_at": "2025-05-30T02:57:07Z",
"repoId": 765083837,
"pullRequestNo": 2550
},
{
"name": "PairZhu",
"id": 47098840,
"comment_id": 2938149702,
"created_at": "2025-06-04T02:39:39Z",
"repoId": 765083837,
"pullRequestNo": 2566
},
{
"name": "AdrianWangs",
"id": 72337244,
"comment_id": 2943818300,
"created_at": "2025-06-05T11:30:42Z",
"repoId": 765083837,
"pullRequestNo": 2578
},
{
"name": "YanzhenHuang",
"id": 86364920,
"comment_id": 2968974232,
"created_at": "2025-06-13T04:17:08Z",
"repoId": 765083837,
"pullRequestNo": 2620
},
{
"name": "Ar-Hyk",
"id": 55748412,
"comment_id": 2970512136,
"created_at": "2025-06-13T14:02:16Z",
"repoId": 765083837,
"pullRequestNo": 2634
}
]
}