mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 02:58:54 +07:00
165 lines
3.8 KiB
TOML
165 lines
3.8 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=61.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "mineru"
|
|
dynamic = ["version"]
|
|
license = { text = "AGPL-3.0" }
|
|
description = "A practical tool for converting PDF to Markdown"
|
|
readme = "README.md"
|
|
requires-python = ">=3.10,<3.14"
|
|
keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
]
|
|
dependencies = [
|
|
"boto3>=1.28.43",
|
|
"click>=8.1.7",
|
|
"loguru>=0.7.2",
|
|
"numpy>=1.21.6",
|
|
"pdfminer.six>=20251230",
|
|
"tqdm>=4.67.1",
|
|
"requests",
|
|
"httpx",
|
|
"pillow>=11.0.0",
|
|
"pypdfium2>=4.30.0",
|
|
"pypdf>=5.6.0",
|
|
"reportlab",
|
|
"pdftext>=0.6.3",
|
|
"modelscope>=1.26.0",
|
|
"huggingface-hub>=0.32.4",
|
|
"json-repair>=0.46.2",
|
|
"opencv-python>=4.11.0.86",
|
|
"fast-langdetect>=0.2.3,<0.3.0",
|
|
"scikit-image>=0.25.0,<1.0.0",
|
|
"openai>=1.70.0,<3",
|
|
"beautifulsoup4>=4.13.5,<5",
|
|
"magika>=0.6.2,<1.1.0",
|
|
"mineru-vl-utils>=0.1.19.1,<1",
|
|
"qwen-vl-utils>=0.0.14,<1",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
test = [
|
|
"mineru[core]",
|
|
"pytest",
|
|
"pytest-cov",
|
|
"coverage",
|
|
"fuzzywuzzy"
|
|
]
|
|
vlm = [
|
|
"torch>=2.6.0,<3",
|
|
"transformers>=4.51.1,!=4.57.2,<5.0.0",
|
|
"accelerate>=1.5.1",
|
|
]
|
|
vllm = [
|
|
"vllm>=0.10.1.1,<0.12",
|
|
]
|
|
lmdeploy = [
|
|
"lmdeploy>=0.10.2,<0.12",
|
|
]
|
|
mlx = [
|
|
"mlx-vlm>=0.3.3,<0.4",
|
|
]
|
|
pipeline = [
|
|
"matplotlib>=3.10,<4",
|
|
"ultralytics>=8.3.48,<9",
|
|
"doclayout_yolo==0.0.4",
|
|
"dill>=0.3.8,<1",
|
|
"PyYAML>=6.0.1,<7",
|
|
"ftfy>=6.3.1,<7",
|
|
"shapely>=2.0.7,<3",
|
|
"pyclipper>=1.3.0,<2",
|
|
"omegaconf>=2.3.0,<3",
|
|
"torch>=2.6.0,<3",
|
|
"torchvision",
|
|
"transformers>=4.49.0,!=4.51.0,<5.0.0",
|
|
"onnxruntime>1.17.0",
|
|
]
|
|
api = [
|
|
"fastapi",
|
|
"python-multipart",
|
|
"uvicorn",
|
|
]
|
|
gradio = [
|
|
"gradio==5.49.1",
|
|
"gradio-pdf==0.0.22",
|
|
]
|
|
core = [
|
|
"mineru[vlm]",
|
|
"mineru[pipeline]",
|
|
"mineru[api]",
|
|
"mineru[gradio]",
|
|
]
|
|
all = [
|
|
"mineru[core]",
|
|
"mineru[mlx] ; sys_platform == 'darwin'",
|
|
"mineru[vllm] ; sys_platform == 'linux'",
|
|
"mineru[lmdeploy] ; sys_platform == 'windows'",
|
|
]
|
|
|
|
[project.urls]
|
|
homepage = "https://mineru.net/"
|
|
documentation = "https://opendatalab.github.io/MinerU/"
|
|
repository = "https://github.com/opendatalab/MinerU"
|
|
issues = "https://github.com/opendatalab/MinerU/issues"
|
|
|
|
[project.scripts]
|
|
mineru = "mineru.cli.client:main"
|
|
mineru-vllm-server = "mineru.cli.vlm_server:vllm_server"
|
|
mineru-lmdeploy-server = "mineru.cli.vlm_server:lmdeploy_server"
|
|
mineru-openai-server = "mineru.cli.vlm_server:openai_server"
|
|
mineru-models-download = "mineru.cli.models_download:download_models"
|
|
mineru-api = "mineru.cli.fast_api:main"
|
|
mineru-gradio = "mineru.cli.gradio_app:main"
|
|
|
|
[tool.setuptools.dynamic]
|
|
version = { attr = "mineru.version.__version__" }
|
|
|
|
[tool.setuptools.packages.find]
|
|
include = ["mineru*"]
|
|
namespaces = false
|
|
|
|
[tool.setuptools.package-data]
|
|
"mineru" = ["resources/**"]
|
|
"mineru.model.utils.pytorchocr.utils" = ["resources/**"]
|
|
|
|
[tool.setuptools]
|
|
include-package-data = true
|
|
zip-safe = false
|
|
|
|
[tool.pytest.ini_options]
|
|
addopts = "-s --cov=mineru --cov-report html"
|
|
|
|
[tool.coverage.run]
|
|
command_line = "-m pytest tests/unittest/test_e2e.py"
|
|
source = ["mineru/"]
|
|
omit = [
|
|
"*/gradio_app.py",
|
|
"*/models_download.py",
|
|
"*/fast_api.py",
|
|
"*/cli/client.py",
|
|
"*/vlm_vllm_server.py",
|
|
"*/cli_parser.py",
|
|
]
|
|
|
|
[tool.coverage.html]
|
|
directory = "htmlcov"
|
|
|
|
[tool.coverage.report]
|
|
exclude_also = [
|
|
'def __repr__',
|
|
'if self.debug:',
|
|
'if settings.DEBUG',
|
|
'raise AssertionError',
|
|
'raise NotImplementedError',
|
|
'if 0:',
|
|
'if __name__ == .__main__.:',
|
|
'if TYPE_CHECKING:',
|
|
'class .*\bProtocol\):',
|
|
'@(abc\.)?abstractmethod',
|
|
] |