mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 19:18:34 +07:00
Compare commits
16 Commits
release-2.
...
release-2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
304a6d9d8c | ||
|
|
bce9bb6d1d | ||
|
|
920220e48e | ||
|
|
9fc3d6c742 | ||
|
|
8fd544273e | ||
|
|
72f1f5f935 | ||
|
|
5559a4701a | ||
|
|
437022abfa | ||
|
|
4653ed1502 | ||
|
|
b58c7f8d6e | ||
|
|
f6133b1731 | ||
|
|
12d72c7c17 | ||
|
|
5f3f35c009 | ||
|
|
16ad71446b | ||
|
|
d4b364eb9f | ||
|
|
5db08afef6 |
@@ -44,7 +44,7 @@
|
||||
|
||||
# Changelog
|
||||
|
||||
- 2025/09/19 2.5.0 Released
|
||||
- 2025/09/19 2.5.1 Released
|
||||
|
||||
We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing.
|
||||
With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3.
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
# 更新记录
|
||||
|
||||
- 2025/09/19 2.5.0 发布
|
||||
- 2025/09/19 2.5.1 发布
|
||||
我们正式发布 MinerU2.5,当前最强文档解析多模态大模型。仅凭 1.2B 参数,MinerU2.5 在 OmniDocBench 文档解析评测中,精度已全面超越 Gemini2.5-Pro、GPT-4o、Qwen2.5-VL-72B等顶级多模态大模型,并显著领先于主流文档解析专用模型(如 dots.ocr, MonkeyOCR, PP-StructureV3 等)。
|
||||
模型已发布至[HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)和[ModelScope](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)平台,欢迎大家下载使用!
|
||||
- 核心亮点
|
||||
|
||||
@@ -14,6 +14,7 @@ from ...utils.model_utils import get_vram
|
||||
from ...utils.models_download_utils import auto_download_and_get_model_root_path
|
||||
|
||||
from mineru_vl_utils import MinerUClient
|
||||
from packaging import version
|
||||
|
||||
|
||||
class ModelSingleton:
|
||||
@@ -52,7 +53,6 @@ class ModelSingleton:
|
||||
except ImportError:
|
||||
raise ImportError("Please install transformers to use the transformers backend.")
|
||||
|
||||
from packaging import version
|
||||
if version.parse(transformers_version) >= version.parse("4.56.0"):
|
||||
dtype_key = "dtype"
|
||||
else:
|
||||
@@ -88,24 +88,32 @@ class ModelSingleton:
|
||||
elif backend == "vllm-engine":
|
||||
try:
|
||||
import vllm
|
||||
vllm_version = vllm.__version__
|
||||
from mineru_vl_utils import MinerULogitsProcessor
|
||||
except ImportError:
|
||||
raise ImportError("Please install vllm to use the vllm-engine backend.")
|
||||
if "gpu_memory_utilization" not in kwargs:
|
||||
kwargs["gpu_memory_utilization"] = 0.5
|
||||
if "model" not in kwargs:
|
||||
kwargs["model"] = model_path
|
||||
if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
|
||||
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
||||
# 使用kwargs为 vllm初始化参数
|
||||
vllm_llm = vllm.LLM(**kwargs)
|
||||
elif backend == "vllm-async-engine":
|
||||
try:
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
from vllm import __version__ as vllm_version
|
||||
from mineru_vl_utils import MinerULogitsProcessor
|
||||
except ImportError:
|
||||
raise ImportError("Please install vllm to use the vllm-async-engine backend.")
|
||||
if "gpu_memory_utilization" not in kwargs:
|
||||
kwargs["gpu_memory_utilization"] = 0.5
|
||||
if "model" not in kwargs:
|
||||
kwargs["model"] = model_path
|
||||
if version.parse(vllm_version) >= version.parse("0.10.1") and "logits_processors" not in kwargs:
|
||||
kwargs["logits_processors"] = [MinerULogitsProcessor]
|
||||
# 使用kwargs为 vllm初始化参数
|
||||
vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
|
||||
self._models[key] = MinerUClient(
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import sys
|
||||
|
||||
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
||||
|
||||
from vllm.entrypoints.cli.main import main as vllm_main
|
||||
from vllm import __version__ as vllm_version
|
||||
from packaging import version
|
||||
|
||||
|
||||
def main():
|
||||
@@ -9,6 +12,7 @@ def main():
|
||||
|
||||
has_port_arg = False
|
||||
has_gpu_memory_utilization_arg = False
|
||||
has_logits_processors_arg = False
|
||||
model_path = None
|
||||
model_arg_indices = []
|
||||
|
||||
@@ -18,6 +22,8 @@ def main():
|
||||
has_port_arg = True
|
||||
if arg == "--gpu-memory-utilization" or arg.startswith("--gpu-memory-utilization="):
|
||||
has_gpu_memory_utilization_arg = True
|
||||
if arg == "--logits-processors" or arg.startswith("--logits-processors="):
|
||||
has_logits_processors_arg = True
|
||||
if arg == "--model":
|
||||
if i + 1 < len(args):
|
||||
model_path = args[i + 1]
|
||||
@@ -38,6 +44,8 @@ def main():
|
||||
args.extend(["--gpu-memory-utilization", "0.5"])
|
||||
if not model_path:
|
||||
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
||||
if not has_logits_processors_arg and version.parse(vllm_version) >= version.parse("0.10.1"):
|
||||
args.extend(["--logits-processors", "mineru_vl_utils:MinerULogitsProcessor"])
|
||||
|
||||
# 重构参数,将模型路径作为位置参数
|
||||
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.2.2"
|
||||
__version__ = "2.5.0"
|
||||
|
||||
@@ -39,7 +39,7 @@ dependencies = [
|
||||
"openai>=1.70.0,<2",
|
||||
"beautifulsoup4>=4.13.5,<5",
|
||||
"magika>=0.6.2,<0.7.0",
|
||||
"mineru_vl_utils>=0.1.6,<1.0.0",
|
||||
"mineru-vl-utils>=0.1.7,<1",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
Reference in New Issue
Block a user