diff --git a/README.md b/README.md index 99b51cac..867dc3ab 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,10 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte # Changelog +- 2025/06/15 2.0.2 released + - Fixed a configuration file key-value update error that occurred when downloading model type was set to `all` + - Fixed an issue where formula and table feature toggle parameters in the command line interface could not be effectively disabled + - Updated Dockerfile and installation documentation for deploying the full version of MinerU in sglang environment - 2025/06/13 2.0.0 Released - MinerU 2.0 represents a comprehensive reconstruction and upgrade from architecture to functionality, delivering a more streamlined design, enhanced performance, and more flexible user experience. - **New Architecture**: MinerU 2.0 has been deeply restructured in code organization and interaction methods, significantly improving system usability, maintainability, and extensibility. @@ -482,7 +486,7 @@ There are three different ways to experience MinerU: ```bash pip install --upgrade pip pip install uv -uv pip install "mineru[core]>=2.0.0" +uv pip install -U "mineru[core]" ``` #### 1.2 Install from source @@ -493,19 +497,40 @@ cd MinerU uv pip install -e .[core] ``` -#### 1.3 Install full version (with sglang acceleration) +#### 1.3 Install the Full Version (Supports sglang Acceleration) -To use **sglang acceleration for VLM model inference**, install the full version: +If you need to use **sglang to accelerate VLM model inference**, you can choose any of the following methods to install the full version: -```bash -uv pip install "mineru[all]>=2.0.0" -``` - -Or install from source: - -```bash -uv pip install -e .[all] -``` +- Install using uv or pip: + ```bash + uv pip install -U "mineru[all]" + ``` +- Install from source: + ```bash + uv pip install -e .[all] + ``` +- Build image using Dockerfile: + ```bash + wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/global/Dockerfile + docker build -t mineru-sglang:latest -f Dockerfile . + ``` + Start Docker container: + ```bash + docker run --gpus all \ + --shm-size 32g \ + -p 30000:30000 \ + --ipc=host \ + mineru-sglang:latest \ + mineru-sglang-server --host 0.0.0.0 --port 30000 + ``` + Or start using Docker Compose: + ```bash + wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/compose.yaml + docker compose -f compose.yaml up -d + ``` + +> [!TIP] +> The Dockerfile uses `lmsysorg/sglang:v0.4.7-cu124` as the default base image. If necessary, you can modify it to another platform version. --- @@ -629,7 +654,8 @@ mineru-sglang-server --port 30000 mineru -p -o -b vlm-sglang-client -u http://127.0.0.1:30000 ``` -> 💡 For more information about output files, please refer to [Output File Documentation](docs/output_file_en_us.md) +> [!TIP] +> For more information about output files, please refer to [Output File Documentation](docs/output_file_en_us.md) --- diff --git a/README_zh-CN.md b/README_zh-CN.md index 8ca56575..e27b2b4d 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -50,6 +50,10 @@ # 更新记录 +- 2025/06/15 2.0.2发布 + - 修复了当下载模型类型设置为`all`时,配置文件出现键值更新错误的问题 + - 修复了命令行接口中公式和表格功能的开关参数实际无法关闭的问题 + - 更新了sglang环境下部署完整版MinerU的Dockerfile和相关安装文档 - 2025/06/13 2.0.0发布 - MinerU 2.0 是一次从架构到功能的全面重构与升级,带来了更简洁的设计、更强的性能以及更灵活的使用体验。 - **全新架构**:MinerU 2.0 在代码结构和交互方式上进行了深度重构,显著提升了系统的易用性、可维护性与扩展能力。 @@ -472,7 +476,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c ```bash pip install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple pip install uv -i https://mirrors.aliyun.com/pypi/simple -uv pip install "mineru[core]>=2.0.0" -i https://mirrors.aliyun.com/pypi/simple +uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple ``` #### 1.2 源码安装 @@ -485,17 +489,38 @@ uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple #### 1.3 安装完整版(支持 sglang 加速) -如需使用 **sglang 加速 VLM 模型推理**,请安装完整版本: +如需使用 **sglang 加速 VLM 模型推理**,请选择合适的方式安装完整版本: -```bash -uv pip install "mineru[all]>=2.0.0" -i https://mirrors.aliyun.com/pypi/simple -``` - -或从源码安装: - -```bash -uv pip install -e .[all] -i https://mirrors.aliyun.com/pypi/simple -``` +- 使用uv或pip安装 + ```bash + uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple + ``` +- 从源码安装: + ```bash + uv pip install -e .[all] -i https://mirrors.aliyun.com/pypi/simple + ``` +- 使用 Dockerfile 构建镜像: + ```bash + wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/Dockerfile + docker build -t mineru-sglang:latest -f Dockerfile . + ``` + 启动 Docker 容器: + ```bash + docker run --gpus all \ + --shm-size 32g \ + -p 30000:30000 \ + --ipc=host \ + mineru-sglang:latest \ + mineru-sglang-server --host 0.0.0.0 --port 30000 + ``` + 或使用 Docker Compose 启动: + ```bash + wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/compose.yaml + docker compose -f compose.yaml up -d + ``` + +> [!TIP] +> Dockerfile默认使用`lmsysorg/sglang:v0.4.7-cu124`作为基础镜像,如有需要,您可以自行修改为其他平台版本。 --- @@ -619,7 +644,8 @@ mineru-sglang-server --port 30000 mineru -p -o -b vlm-sglang-client -u http://127.0.0.1:30000 ``` -> 💡 更多关于输出文件的信息,请参考 [输出文件说明](docs/output_file_zh_cn.md) +> [!TIP] +> 更多关于输出文件的信息,请参考 [输出文件说明](docs/output_file_zh_cn.md) --- diff --git a/docker/china/Dockerfile b/docker/china/Dockerfile index 029917b2..6e8055e0 100644 --- a/docker/china/Dockerfile +++ b/docker/china/Dockerfile @@ -1,33 +1,8 @@ -# Use the official Ubuntu base image -FROM ubuntu:22.04 - -# Set environment variables to non-interactive to avoid prompts during installation -ENV DEBIAN_FRONTEND=noninteractive - -# Update the package list and install necessary packages -RUN apt-get update && \ - apt-get install -y \ - software-properties-common && \ - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install -y \ - python3.10 \ - python3.10-venv \ - python3.10-distutils \ - python3-pip \ - wget \ - git \ - libgl1 \ - libglib2.0-0 \ - && rm -rf /var/lib/apt/lists/* - -# Set Python 3.10 as the default python3 -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 +# Use the official sglang image +FROM lmsysorg/sglang:v0.4.7-cu124 # install mineru latest -RUN /bin/bash -c "pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \ - pip3 install uv -i https://mirrors.aliyun.com/pypi/simple && \ - uv pip install 'mineru[all]>=2.0.0' -i https://mirrors.aliyun.com/pypi/simple" +RUN python3 -m pip install -U 'mineru[core]' -i https://mirrors.aliyun.com/pypi/simple --break-system-packages # Download models and update the configuration file RUN /bin/bash -c "mineru-models-download -s modelscope -m all" diff --git a/docker/compose.yaml b/docker/compose.yaml new file mode 100644 index 00000000..afc2dc15 --- /dev/null +++ b/docker/compose.yaml @@ -0,0 +1,26 @@ +services: + mineru-sglang: + image: mineru-sglang:latest + container_name: mineru-sglang + restart: always + ports: + - 30000:30000 + environment: + MINERU_MODEL_SOURCE: local + entrypoint: mineru-sglang-server + command: + --host 0.0.0.0 + --port 30000 + ulimits: + memlock: -1 + stack: 67108864 + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"] + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] + capabilities: [gpu] \ No newline at end of file diff --git a/docker/global/Dockerfile b/docker/global/Dockerfile index 7b0c8610..5de822d4 100644 --- a/docker/global/Dockerfile +++ b/docker/global/Dockerfile @@ -1,33 +1,8 @@ -# Use the official Ubuntu base image -FROM ubuntu:22.04 - -# Set environment variables to non-interactive to avoid prompts during installation -ENV DEBIAN_FRONTEND=noninteractive - -# Update the package list and install necessary packages -RUN apt-get update && \ - apt-get install -y \ - software-properties-common && \ - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install -y \ - python3.10 \ - python3.10-venv \ - python3.10-distutils \ - python3-pip \ - wget \ - git \ - libgl1 \ - libglib2.0-0 \ - && rm -rf /var/lib/apt/lists/* - -# Set Python 3.10 as the default python3 -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 +# Use the official sglang image +FROM lmsysorg/sglang:v0.4.7-cu124 # install mineru latest -RUN /bin/bash -c "pip3 install --upgrade pip && \ - pip3 install uv && \ - uv pip install 'mineru[all]>=2.0.0'" +RUN python3 -m pip install -U 'mineru[core]' --break-system-packages # Download models and update the configuration file RUN /bin/bash -c "mineru-models-download -s huggingface -m all" diff --git a/mineru/backend/pipeline/batch_analyze.py b/mineru/backend/pipeline/batch_analyze.py index 97d98155..954f3b9f 100644 --- a/mineru/backend/pipeline/batch_analyze.py +++ b/mineru/backend/pipeline/batch_analyze.py @@ -5,6 +5,7 @@ from collections import defaultdict import numpy as np from .model_init import AtomModelSingleton +from ...utils.config_reader import get_formula_enable, get_table_enable from ...utils.model_utils import crop_img, get_res_list_from_layout_res from ...utils.ocr_utils import get_adjusted_mfdetrec_res, get_ocr_result_list, OcrConfidence @@ -16,8 +17,8 @@ MFR_BASE_BATCH_SIZE = 16 class BatchAnalyze: def __init__(self, model_manager, batch_ratio: int, formula_enable, table_enable, enable_ocr_det_batch: bool = True): self.batch_ratio = batch_ratio - self.formula_enable = formula_enable - self.table_enable = table_enable + self.formula_enable = get_formula_enable(formula_enable) + self.table_enable = get_table_enable(table_enable) self.model_manager = model_manager self.enable_ocr_det_batch = enable_ocr_det_batch diff --git a/mineru/backend/pipeline/model_json_to_middle_json.py b/mineru/backend/pipeline/model_json_to_middle_json.py index 668574b5..822899e0 100644 --- a/mineru/backend/pipeline/model_json_to_middle_json.py +++ b/mineru/backend/pipeline/model_json_to_middle_json.py @@ -4,7 +4,7 @@ import time from loguru import logger from tqdm import tqdm -from mineru.utils.config_reader import get_device, get_llm_aided_config +from mineru.utils.config_reader import get_device, get_llm_aided_config, get_formula_enable from mineru.backend.pipeline.model_init import AtomModelSingleton from mineru.backend.pipeline.para_split import para_split from mineru.utils.block_pre_proc import prepare_block_bboxes, process_groups @@ -165,6 +165,7 @@ def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer def result_to_middle_json(model_list, images_list, pdf_doc, image_writer, lang=None, ocr_enable=False, formula_enabled=True): middle_json = {"pdf_info": [], "_backend":"pipeline", "_version_name": __version__} + formula_enabled = get_formula_enable(formula_enabled) for page_index, page_model_info in tqdm(enumerate(model_list), total=len(model_list), desc="Processing pages"): page = pdf_doc[page_index] image_dict = images_list[page_index] diff --git a/mineru/backend/pipeline/pipeline_analyze.py b/mineru/backend/pipeline/pipeline_analyze.py index 3df9d4a5..af01f433 100644 --- a/mineru/backend/pipeline/pipeline_analyze.py +++ b/mineru/backend/pipeline/pipeline_analyze.py @@ -5,7 +5,7 @@ import PIL.Image import torch from .model_init import MineruPipelineModel -from mineru.utils.config_reader import get_device, get_formula_config, get_table_recog_config +from mineru.utils.config_reader import get_device from ...utils.pdf_classify import classify from ...utils.pdf_image_tools import load_images_from_pdf @@ -44,20 +44,15 @@ class ModelSingleton: def custom_model_init( lang=None, - formula_enable=None, - table_enable=None, + formula_enable=True, + table_enable=True, ): model_init_start = time.time() # 从配置文件读取model-dir和device device = get_device() - formula_config = get_formula_config() - if formula_enable is not None: - formula_config['enable'] = formula_enable - - table_config = get_table_recog_config() - if table_enable is not None: - table_config['enable'] = table_enable + formula_config = {"enable": formula_enable} + table_config = {"enable": table_enable} model_input = { 'device': device, @@ -78,8 +73,8 @@ def doc_analyze( pdf_bytes_list, lang_list, parse_method: str = 'auto', - formula_enable=None, - table_enable=None, + formula_enable=True, + table_enable=True, ): MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100)) @@ -152,8 +147,8 @@ def doc_analyze( def batch_image_analyze( images_with_extra_info: List[Tuple[PIL.Image.Image, bool, str]], - formula_enable=None, - table_enable=None): + formula_enable=True, + table_enable=True): # os.environ['CUDA_VISIBLE_DEVICES'] = str(idx) from .batch_analyze import BatchAnalyze diff --git a/mineru/cli/client.py b/mineru/cli/client.py index 41818224..99121916 100644 --- a/mineru/cli/client.py +++ b/mineru/cli/client.py @@ -2,7 +2,6 @@ import os import click from pathlib import Path -import torch from loguru import logger from mineru.utils.config_reader import get_device @@ -140,10 +139,6 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes def main(input_path, output_dir, method, backend, lang, server_url, start_page_id, end_page_id, formula_enable, table_enable, device_mode, virtual_vram, model_source): - if os.getenv('MINERU_FORMULA_ENABLE', None) is None: - os.environ['MINERU_FORMULA_ENABLE'] = str(formula_enable).lower() - if os.getenv('MINERU_TABLE_ENABLE', None) is None: - os.environ['MINERU_TABLE_ENABLE'] = str(table_enable).lower() def get_device_mode() -> str: if device_mode is not None: return device_mode @@ -184,6 +179,8 @@ def main(input_path, output_dir, method, backend, lang, server_url, start_page_i p_lang_list=lang_list, backend=backend, parse_method=method, + p_formula_enable=formula_enable, + p_table_enable=table_enable, server_url=server_url, start_page_id=start_page_id, end_page_id=end_page_id diff --git a/mineru/cli/common.py b/mineru/cli/common.py index 74fab6c0..9c9383d5 100644 --- a/mineru/cli/common.py +++ b/mineru/cli/common.py @@ -115,6 +115,7 @@ def do_parse( pdf_doc = all_pdf_docs[idx] _lang = lang_list[idx] _ocr_enable = ocr_enabled_list[idx] + middle_json = pipeline_result_to_middle_json(model_list, images_list, pdf_doc, image_writer, _lang, _ocr_enable, p_formula_enable) pdf_info = middle_json["pdf_info"] diff --git a/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py b/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py index 2c451c7a..d08b6093 100644 --- a/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py +++ b/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py @@ -349,6 +349,7 @@ REPLACEMENTS_PATTERNS = { re.compile(r'\\vline = '): r'\\models ', re.compile(r'\\vDash '): r'\\models ', re.compile(r'\\sq \\sqcup '): r'\\square ', + re.compile(r'\\copyright'): r'©', } QQUAD_PATTERN = re.compile(r'\\qquad(?!\s)') diff --git a/mineru/utils/config_reader.py b/mineru/utils/config_reader.py index 54081202..c31ed8fb 100644 --- a/mineru/utils/config_reader.py +++ b/mineru/utils/config_reader.py @@ -86,22 +86,16 @@ def get_device(): return "cpu" -def get_table_recog_config(): - table_enable = os.getenv('MINERU_TABLE_ENABLE', None) - if table_enable is not None: - return json.loads(f'{{"enable": {table_enable}}}') - else: - # logger.warning(f"not found 'MINERU_TABLE_ENABLE' in environment variable, use 'true' as default.") - return json.loads(f'{{"enable": true}}') +def get_formula_enable(formula_enable): + formula_enable_env = os.getenv('MINERU_FORMULA_ENABLE') + formula_enable = formula_enable if formula_enable_env is None else formula_enable_env.lower() == 'true' + return formula_enable -def get_formula_config(): - formula_enable = os.getenv('MINERU_FORMULA_ENABLE', None) - if formula_enable is not None: - return json.loads(f'{{"enable": {formula_enable}}}') - else: - # logger.warning(f"not found 'MINERU_FORMULA_ENABLE' in environment variable, use 'true' as default.") - return json.loads(f'{{"enable": true}}') +def get_table_enable(table_enable): + table_enable_env = os.getenv('MINERU_TABLE_ENABLE') + table_enable = table_enable if table_enable_env is None else table_enable_env.lower() == 'true' + return table_enable def get_latex_delimiter_config():