mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 19:18:34 +07:00
Compare commits
31 Commits
release-2.
...
release-2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
13c23c475d | ||
|
|
039cf27fd5 | ||
|
|
72c946a9ec | ||
|
|
1bec25b1e1 | ||
|
|
2785f60424 | ||
|
|
3cdcd76c34 | ||
|
|
252255166f | ||
|
|
8deb3b5253 | ||
|
|
f34644eb61 | ||
|
|
5b88eba7a1 | ||
|
|
58b8e8a912 | ||
|
|
7960d8b799 | ||
|
|
20dcbd2164 | ||
|
|
e4e58fa2de | ||
|
|
013ddc02b2 | ||
|
|
390ddd8b96 | ||
|
|
a1e377be05 | ||
|
|
b7d7a1bf99 | ||
|
|
d71c7f3a7d | ||
|
|
2abcc43493 | ||
|
|
f07ac1c8a2 | ||
|
|
af6a2166fe | ||
|
|
0f7d960885 | ||
|
|
9f0008acff | ||
|
|
ccd2a71fbb | ||
|
|
e1181ba814 | ||
|
|
6717712e91 | ||
|
|
a49c605f12 | ||
|
|
740c5f6f5c | ||
|
|
8230995a50 | ||
|
|
8cf86e3818 |
18
README.md
18
README.md
@@ -51,9 +51,13 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
|
||||
</div>
|
||||
|
||||
# Changelog
|
||||
- 2025/06/15 2.0.2 released
|
||||
- 2025/06/17 2.0.4 Released
|
||||
- Fixed the issue where models were still required to be downloaded in the `sglang-client` mode
|
||||
- Fixed the issue where only the first instance would take effect when attempting to launch multiple `sglang-client` instances via multiple URLs within the same process
|
||||
- 2025/06/15 2.0.3 released
|
||||
- Fixed a configuration file key-value update error that occurred when downloading model type was set to `all`
|
||||
- Fixed an issue where formula and table feature toggle parameters in the command line interface could not be effectively disabled
|
||||
- Fixed the issue where the formula and table feature toggle switches were not working in `command line mode`, causing the features to remain enabled.
|
||||
- Fixed compatibility issues with sglang version 0.4.7 in the `sglang-engine` mode.
|
||||
- Updated Dockerfile and installation documentation for deploying the full version of MinerU in sglang environment
|
||||
- 2025/06/13 2.0.0 Released
|
||||
- MinerU 2.0 represents a comprehensive reconstruction and upgrade from architecture to functionality, delivering a more streamlined design, enhanced performance, and more flexible user experience.
|
||||
@@ -366,7 +370,7 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
|
||||
<li><a href="#quick-start">Quick Start</a>
|
||||
<ul>
|
||||
<li><a href="#online-demo">Online Demo</a></li>
|
||||
<li><a href="#quick-cpu-demo">Local Deployment</a></li>
|
||||
<li><a href="#local-deployment">Local Deployment</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
@@ -532,6 +536,14 @@ If you need to use **sglang to accelerate VLM model inference**, you can choose
|
||||
> [!TIP]
|
||||
> The Dockerfile uses `lmsysorg/sglang:v0.4.7-cu124` as the default base image. If necessary, you can modify it to another platform version.
|
||||
|
||||
|
||||
#### 1.4 Install client (for connecting to sglang-server on edge devices that require only CPU and network connectivity)
|
||||
|
||||
```bash
|
||||
uv pip install -U mineru
|
||||
mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Using MinerU
|
||||
|
||||
@@ -50,9 +50,13 @@
|
||||
</div>
|
||||
|
||||
# 更新记录
|
||||
- 2025/06/15 2.0.2发布
|
||||
- 2025/06/17 2.0.4发布
|
||||
- 修复了`sglang-client`模式下依然需要下载模型的问题
|
||||
- 修复了同一进程内尝试通过多个url启动多个`sglang-client`实例时,只有第一个生效的问题
|
||||
- 2025/06/15 2.0.3发布
|
||||
- 修复了当下载模型类型设置为`all`时,配置文件出现键值更新错误的问题
|
||||
- 修复了命令行接口中公式和表格功能的开关参数实际无法关闭的问题
|
||||
- 修复了命令行模式下公式和表格功能开关不生效导致功能无法关闭的问题
|
||||
- 修复了`sglang-engine`模式下,0.4.7版本sglang的兼容性问题
|
||||
- 更新了sglang环境下部署完整版MinerU的Dockerfile和相关安装文档
|
||||
- 2025/06/13 2.0.0发布
|
||||
- MinerU 2.0 是一次从架构到功能的全面重构与升级,带来了更简洁的设计、更强的性能以及更灵活的使用体验。
|
||||
@@ -521,6 +525,13 @@ uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
|
||||
|
||||
> [!TIP]
|
||||
> Dockerfile默认使用`lmsysorg/sglang:v0.4.7-cu124`作为基础镜像,如有需要,您可以自行修改为其他平台版本。
|
||||
|
||||
#### 1.4 安装client(用于在仅需 CPU 和网络连接的边缘设备上连接 sglang-server)
|
||||
|
||||
```bash
|
||||
uv pip install -U mineru -i https://mirrors.aliyun.com/pypi/simple
|
||||
mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -240,4 +240,4 @@ if __name__ == '__main__':
|
||||
"""To enable VLM mode, change the backend to 'vlm-xxx'"""
|
||||
# parse_doc(doc_path_list, output_dir, backend="vlm-transformers") # more general.
|
||||
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-engine") # faster(engine).
|
||||
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-client", server_url="http://127.0.0.1:30000") # faster(client).
|
||||
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-client", server_url="http://127.0.0.1:30000") # faster(client).
|
||||
@@ -76,7 +76,11 @@ def doc_analyze(
|
||||
formula_enable=True,
|
||||
table_enable=True,
|
||||
):
|
||||
MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
|
||||
"""
|
||||
适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能,可能会增加显存使用量,
|
||||
可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置,默认值为100。
|
||||
"""
|
||||
min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
|
||||
|
||||
# 收集所有页面信息
|
||||
all_pages_info = [] # 存储(dataset_index, page_index, img, ocr, lang, width, height)
|
||||
@@ -109,7 +113,7 @@ def doc_analyze(
|
||||
|
||||
# 准备批处理
|
||||
images_with_extra_info = [(info[2], info[3], info[4]) for info in all_pages_info]
|
||||
batch_size = MIN_BATCH_INFERENCE_SIZE
|
||||
batch_size = min_batch_inference_size
|
||||
batch_images = [
|
||||
images_with_extra_info[i:i + batch_size]
|
||||
for i in range(0, len(images_with_extra_info), batch_size)
|
||||
|
||||
@@ -27,9 +27,9 @@ class ModelSingleton:
|
||||
model_path: str | None,
|
||||
server_url: str | None,
|
||||
) -> BasePredictor:
|
||||
key = (backend,)
|
||||
key = (backend, model_path, server_url)
|
||||
if key not in self._models:
|
||||
if not model_path:
|
||||
if backend in ['transformers', 'sglang-engine'] and not model_path:
|
||||
model_path = auto_download_and_get_model_root_path("/","vlm")
|
||||
self._models[key] = get_predictor(
|
||||
backend=backend,
|
||||
|
||||
@@ -21,7 +21,7 @@ class MathDataset(Dataset):
|
||||
class UnimernetModel(object):
|
||||
def __init__(self, weight_dir, _device_="cpu"):
|
||||
from .unimernet_hf import UnimernetModel
|
||||
if _device_.startswith("mps"):
|
||||
if _device_.startswith("mps") or _device_.startswith("npu"):
|
||||
self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager")
|
||||
else:
|
||||
self.model = UnimernetModel.from_pretrained(weight_dir)
|
||||
|
||||
@@ -183,8 +183,8 @@ async def _one_request(
|
||||
created_time: Optional[float],
|
||||
):
|
||||
tokenized_obj = await self._tokenize_one_request(obj)
|
||||
self._send_one_request(obj, tokenized_obj, created_time)
|
||||
async for out in self._wait_one_response(obj, request):
|
||||
state = self._send_one_request(obj, tokenized_obj, created_time)
|
||||
async for out in self._wait_one_response(obj, state, request):
|
||||
yield out
|
||||
|
||||
|
||||
@@ -256,8 +256,8 @@ async def _generate_request(
|
||||
is_single = obj.is_single
|
||||
if is_single:
|
||||
tokenized_obj = await self._tokenize_one_request(obj)
|
||||
self._send_one_request(obj, tokenized_obj, created_time)
|
||||
async for response in self._wait_one_response(obj, request):
|
||||
state = self._send_one_request(obj, tokenized_obj, created_time)
|
||||
async for response in self._wait_one_response(obj, state, request):
|
||||
yield response
|
||||
else:
|
||||
async for response in _handle_batch_request(self, obj, request, created_time):
|
||||
|
||||
@@ -6,10 +6,26 @@ from sglang.srt.entrypoints.http_server import app, generate_request, launch_ser
|
||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||
from sglang.srt.server_args import prepare_server_args
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.srt.conversation import Conversation
|
||||
|
||||
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
||||
from .logit_processor import Mineru2LogitProcessor
|
||||
|
||||
# mineru2.0的chat_template与chatml在换行上有微小区别
|
||||
def custom_get_prompt(self) -> str:
|
||||
system_prompt = self.system_template.format(system_message=self.system_message)
|
||||
if self.system_message == "":
|
||||
ret = ""
|
||||
else:
|
||||
ret = system_prompt + self.sep
|
||||
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + "\n" + message + self.sep
|
||||
else:
|
||||
ret += role + "\n"
|
||||
return ret
|
||||
|
||||
_custom_logit_processor_str = Mineru2LogitProcessor().to_str()
|
||||
|
||||
# remote the existing /generate route
|
||||
@@ -45,6 +61,7 @@ def main():
|
||||
|
||||
if server_args.chat_template is None:
|
||||
server_args.chat_template = "chatml"
|
||||
Conversation.get_prompt = custom_get_prompt
|
||||
|
||||
server_args.enable_custom_logit_processor = True
|
||||
|
||||
|
||||
@@ -33,9 +33,11 @@ class CategoryId:
|
||||
TableCaption = 6
|
||||
TableFootnote = 7
|
||||
InterlineEquation_Layout = 8
|
||||
InterlineEquationNumber_Layout = 9
|
||||
InlineEquation = 13
|
||||
InterlineEquation_YOLO = 14
|
||||
OcrText = 15
|
||||
LowScoreText = 16
|
||||
ImageFootnote = 101
|
||||
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.0.1"
|
||||
__version__ = "2.0.3"
|
||||
|
||||
@@ -43,7 +43,7 @@ vlm = [
|
||||
"pydantic",
|
||||
]
|
||||
sglang = [
|
||||
"sglang[all]>=0.4.7",
|
||||
"sglang[all]==0.4.7",
|
||||
]
|
||||
pipeline = [
|
||||
"matplotlib>=3.10,<4",
|
||||
|
||||
Reference in New Issue
Block a user