mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
173 lines
3.9 KiB
TOML
173 lines
3.9 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=61.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "mineru"
|
|
dynamic = ["version"]
|
|
license = { text = "AGPL-3.0" }
|
|
description = "A practical tool for converting PDF to Markdown"
|
|
readme = "README.md"
|
|
requires-python = ">=3.10,<3.14"
|
|
keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
]
|
|
dependencies = [
|
|
"boto3>=1.28.43",
|
|
"click>=8.1.7",
|
|
"loguru>=0.7.2",
|
|
"numpy>=1.21.6",
|
|
"pdfminer.six==20250506",
|
|
"tqdm>=4.67.1",
|
|
"requests",
|
|
"httpx",
|
|
"pillow>=11.0.0",
|
|
"pypdfium2>=4.30.0",
|
|
"pypdf>=5.6.0",
|
|
"reportlab",
|
|
"pdftext>=0.6.2",
|
|
"modelscope>=1.26.0",
|
|
"huggingface-hub>=0.32.4",
|
|
"json-repair>=0.46.2",
|
|
"opencv-python>=4.11.0.86",
|
|
"fast-langdetect>=0.2.3,<0.3.0",
|
|
"scikit-image",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
test = [
|
|
"mineru[core]",
|
|
"pytest",
|
|
"pytest-cov",
|
|
"coverage",
|
|
"beautifulsoup4",
|
|
"fuzzywuzzy"
|
|
]
|
|
vlm = [
|
|
"transformers>=4.51.1",
|
|
"torch>=2.6.0",
|
|
"accelerate>=1.5.1",
|
|
"pydantic",
|
|
]
|
|
sglang = [
|
|
"sglang[all]>=0.4.7,<0.4.10",
|
|
]
|
|
pipeline = [
|
|
"matplotlib>=3.10,<4",
|
|
"ultralytics>=8.3.48,<9",
|
|
"doclayout_yolo==0.0.4",
|
|
"dill>=0.3.8,<1",
|
|
"rapid_table>=1.0.5,<2.0.0",
|
|
"PyYAML>=6.0.2,<7",
|
|
"ftfy>=6.3.1,<7",
|
|
"openai>=1.70.0,<2",
|
|
"shapely>=2.0.7,<3",
|
|
"pyclipper>=1.3.0,<2",
|
|
"omegaconf>=2.3.0,<3",
|
|
"torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
|
|
"torchvision",
|
|
"transformers>=4.49.0,!=4.51.0,<5.0.0",
|
|
]
|
|
api = [
|
|
"fastapi",
|
|
"python-multipart",
|
|
"uvicorn",
|
|
]
|
|
gradio = [
|
|
"gradio>=5.34,<6",
|
|
"gradio-pdf>=0.0.22",
|
|
]
|
|
core = [
|
|
"mineru[vlm]",
|
|
"mineru[pipeline]",
|
|
"mineru[api]",
|
|
"mineru[gradio]",
|
|
]
|
|
all = [
|
|
"mineru[core]",
|
|
"mineru[sglang]",
|
|
]
|
|
pipeline_old_linux = [
|
|
"matplotlib>=3.10,<=3.10.1",
|
|
"ultralytics>=8.3.48,<=8.3.104",
|
|
"doclayout_yolo==0.0.4",
|
|
"dill==0.3.8",
|
|
"PyYAML==6.0.2",
|
|
"ftfy==6.3.1",
|
|
"openai==1.71.0",
|
|
"shapely==2.1.0",
|
|
"pyclipper==1.3.0.post6",
|
|
"omegaconf==2.3.0",
|
|
"albumentations==1.4.20",
|
|
"rapid_table==1.0.3",
|
|
"torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
|
|
"torchvision",
|
|
"transformers>=4.49.0,!=4.51.0,<5.0.0",
|
|
]
|
|
|
|
[project.urls]
|
|
homepage = "https://mineru.net/"
|
|
documentation = "https://opendatalab.github.io/MinerU/"
|
|
repository = "https://github.com/opendatalab/MinerU"
|
|
issues = "https://github.com/opendatalab/MinerU/issues"
|
|
|
|
[project.scripts]
|
|
mineru = "mineru.cli:client.main"
|
|
mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
|
|
mineru-models-download = "mineru.cli.models_download:download_models"
|
|
mineru-api = "mineru.cli.fast_api:main"
|
|
mineru-gradio = "mineru.cli.gradio_app:main"
|
|
|
|
[tool.setuptools.dynamic]
|
|
version = { attr = "mineru.version.__version__" }
|
|
|
|
[tool.setuptools.packages.find]
|
|
include = ["mineru*"]
|
|
namespaces = false
|
|
|
|
[tool.setuptools.package-data]
|
|
"mineru" = ["resources/**"]
|
|
"mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"]
|
|
|
|
[tool.setuptools]
|
|
include-package-data = true
|
|
zip-safe = false
|
|
|
|
[tool.pytest.ini_options]
|
|
addopts = "-s --cov=mineru --cov-report html"
|
|
|
|
[tool.coverage.run]
|
|
command_line = "-m pytest tests/unittest/test_e2e.py"
|
|
source = ["mineru/"]
|
|
omit = [
|
|
"*/vlm_sglang_model/*",
|
|
"*/gradio_app.py",
|
|
"*/models_download.py",
|
|
"*/fast_api.py",
|
|
"*/cli/client.py",
|
|
"*/sglang_engine_predictor.py",
|
|
"*/vlm_sglang_server.py",
|
|
"*/cli_parser.py",
|
|
"*/run_async.py"
|
|
]
|
|
|
|
[tool.coverage.html]
|
|
directory = "htmlcov"
|
|
|
|
[tool.coverage.report]
|
|
exclude_also = [
|
|
'def __repr__',
|
|
'if self.debug:',
|
|
'if settings.DEBUG',
|
|
'raise AssertionError',
|
|
'raise NotImplementedError',
|
|
'if 0:',
|
|
'if __name__ == .__main__.:',
|
|
'if TYPE_CHECKING:',
|
|
'class .*\bProtocol\):',
|
|
'@(abc\.)?abstractmethod',
|
|
] |