mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-04-01 05:28:36 +07:00
Compare commits
15 Commits
mineru-3.0
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
887758e99d | ||
|
|
31f368ab85 | ||
|
|
2c65149062 | ||
|
|
e976ca2af0 | ||
|
|
87a14040c0 | ||
|
|
739c6343b6 | ||
|
|
11a9a9465e | ||
|
|
b583702df1 | ||
|
|
1ca160fdc2 | ||
|
|
93d5251e18 | ||
|
|
29f767029e | ||
|
|
39b903f029 | ||
|
|
3d508abfd1 | ||
|
|
b9485f1014 | ||
|
|
5869af336b |
@@ -110,6 +110,10 @@ class LocalAPIServer:
|
||||
self.process: subprocess.Popen[bytes] | None = None
|
||||
self._atexit_registered = False
|
||||
self.extra_cli_args = tuple(extra_cli_args)
|
||||
# On Windows, the temporary FastAPI child process can stall during parsing
|
||||
# startup when launched with stdin=PIPE and an EOF-based shutdown watcher.
|
||||
# Use explicit process termination there instead of stdin-driven shutdown.
|
||||
self._use_stdin_shutdown_watcher = os.name != "nt"
|
||||
|
||||
def start(self) -> str:
|
||||
if self.process is not None:
|
||||
@@ -124,7 +128,12 @@ class LocalAPIServer:
|
||||
read_max_concurrent_requests(default=DEFAULT_MAX_CONCURRENT_REQUESTS)
|
||||
)
|
||||
env["MINERU_API_DISABLE_ACCESS_LOG"] = "1"
|
||||
env["MINERU_API_SHUTDOWN_ON_STDIN_EOF"] = "1"
|
||||
if self._use_stdin_shutdown_watcher:
|
||||
env["MINERU_API_SHUTDOWN_ON_STDIN_EOF"] = "1"
|
||||
stdin_target = subprocess.PIPE
|
||||
else:
|
||||
env.pop("MINERU_API_SHUTDOWN_ON_STDIN_EOF", None)
|
||||
stdin_target = subprocess.DEVNULL
|
||||
self.output_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
command = [
|
||||
@@ -141,7 +150,7 @@ class LocalAPIServer:
|
||||
command,
|
||||
cwd=os.getcwd(),
|
||||
env=env,
|
||||
stdin=subprocess.PIPE,
|
||||
stdin=stdin_target,
|
||||
)
|
||||
|
||||
if not self._atexit_registered:
|
||||
@@ -154,23 +163,30 @@ class LocalAPIServer:
|
||||
self.process = None
|
||||
try:
|
||||
if process is not None and process.poll() is None:
|
||||
if process.stdin is not None and not process.stdin.closed:
|
||||
process.stdin.close()
|
||||
try:
|
||||
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(
|
||||
"Local mineru-api did not stop after stdin EOF within {}s. Falling back to SIGTERM.",
|
||||
LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS,
|
||||
)
|
||||
process.terminate()
|
||||
if self._use_stdin_shutdown_watcher:
|
||||
if process.stdin is not None and not process.stdin.closed:
|
||||
process.stdin.close()
|
||||
try:
|
||||
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
|
||||
return
|
||||
except subprocess.TimeoutExpired:
|
||||
pass
|
||||
process.kill()
|
||||
logger.debug(
|
||||
"Local mineru-api did not stop after stdin EOF within {}s. Falling back to SIGTERM.",
|
||||
LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Stopping local mineru-api with process termination on Windows."
|
||||
)
|
||||
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
|
||||
return
|
||||
except subprocess.TimeoutExpired:
|
||||
pass
|
||||
process.kill()
|
||||
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
|
||||
finally:
|
||||
if self._atexit_registered:
|
||||
try:
|
||||
|
||||
@@ -12,7 +12,7 @@ from contextlib import asynccontextmanager, suppress
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from typing import Annotated, Any, Optional
|
||||
|
||||
import click
|
||||
import uvicorn
|
||||
@@ -81,6 +81,11 @@ FILE_PARSE_TASK_ID_HEADER = "X-MinerU-Task-Id"
|
||||
FILE_PARSE_TASK_STATUS_HEADER = "X-MinerU-Task-Status"
|
||||
FILE_PARSE_TASK_STATUS_URL_HEADER = "X-MinerU-Task-Status-Url"
|
||||
FILE_PARSE_TASK_RESULT_URL_HEADER = "X-MinerU-Task-Result-Url"
|
||||
SWAGGER_UI_FILE_ARRAY_SCHEMA_EXTRA = {
|
||||
# Swagger UI 5 currently fails to render a usable multi-file picker when
|
||||
# FastAPI emits OpenAPI 3.1 byte arrays with contentMediaType.
|
||||
"items": {"type": "string", "format": "binary"}
|
||||
}
|
||||
|
||||
# 并发控制器
|
||||
_request_semaphore: Optional[asyncio.Semaphore] = None
|
||||
@@ -609,6 +614,7 @@ def build_result_response(
|
||||
return_images: bool,
|
||||
response_format_zip: bool,
|
||||
return_original_file: bool,
|
||||
zip_filename: str = "results.zip",
|
||||
) -> Response:
|
||||
if response_format_zip:
|
||||
zip_path = create_result_zip(
|
||||
@@ -627,7 +633,7 @@ def build_result_response(
|
||||
return FileResponse(
|
||||
path=zip_path,
|
||||
media_type="application/zip",
|
||||
filename="results.zip",
|
||||
filename=zip_filename,
|
||||
status_code=status_code,
|
||||
)
|
||||
|
||||
@@ -683,6 +689,7 @@ def build_sync_file_parse_response(
|
||||
return_images=task.return_images,
|
||||
response_format_zip=task.response_format_zip,
|
||||
return_original_file=task.return_original_file,
|
||||
zip_filename=f"{task.task_id}.zip",
|
||||
)
|
||||
response.headers[FILE_PARSE_TASK_ID_HEADER] = task.task_id
|
||||
response.headers[FILE_PARSE_TASK_STATUS_HEADER] = task.status
|
||||
@@ -713,12 +720,17 @@ def build_sync_file_parse_response(
|
||||
|
||||
|
||||
async def parse_request_form(
|
||||
files: list[UploadFile] = File(
|
||||
..., description="Upload pdf or image files for parsing"
|
||||
),
|
||||
lang_list: list[str] = Form(
|
||||
["ch"],
|
||||
description="""(Adapted only for pipeline and hybrid backend)Input the languages in the pdf to improve OCR accuracy.Options:
|
||||
files: Annotated[
|
||||
list[UploadFile],
|
||||
File(
|
||||
description="Upload pdf or image files for parsing",
|
||||
json_schema_extra=SWAGGER_UI_FILE_ARRAY_SCHEMA_EXTRA,
|
||||
),
|
||||
],
|
||||
lang_list: Annotated[
|
||||
list[str],
|
||||
Form(
|
||||
description="""(Adapted only for pipeline and hybrid backend)Input the languages in the pdf to improve OCR accuracy.Options:
|
||||
- ch: Chinese, English, Chinese Traditional.
|
||||
- ch_lite: Chinese, English, Chinese Traditional, Japanese.
|
||||
- ch_server: Chinese, English, Chinese Traditional, Japanese.
|
||||
@@ -737,59 +749,84 @@ async def parse_request_form(
|
||||
- cyrillic: Russian, Belarusian, Ukrainian, Serbian (Cyrillic), Bulgarian, Mongolian, Abkhazian, Adyghe, Kabardian, Avar, Dargin, Ingush, Chechen, Lak, Lezgin, Tabasaran, Kazakh, Kyrgyz, Tajik, Macedonian, Tatar, Chuvash, Bashkir, Malian, Moldovan, Udmurt, Komi, Ossetian, Buryat, Kalmyk, Tuvan, Sakha, Karakalpak, English.
|
||||
- devanagari: Hindi, Marathi, Nepali, Bihari, Maithili, Angika, Bhojpuri, Magahi, Santali, Newari, Konkani, Sanskrit, Haryanvi, English.
|
||||
""",
|
||||
),
|
||||
backend: str = Form(
|
||||
"hybrid-auto-engine",
|
||||
description="""The backend for parsing:
|
||||
),
|
||||
] = ["ch"],
|
||||
backend: Annotated[
|
||||
str,
|
||||
Form(
|
||||
description="""The backend for parsing:
|
||||
- pipeline: More general, supports multiple languages, hallucination-free.
|
||||
- vlm-auto-engine: High accuracy via local computing power, supports Chinese and English documents only.
|
||||
- vlm-http-client: High accuracy via remote computing power(client suitable for openai-compatible servers), supports Chinese and English documents only.
|
||||
- hybrid-auto-engine: Next-generation high accuracy solution via local computing power, supports multiple languages.
|
||||
- hybrid-http-client: High accuracy via remote computing power but requires a little local computing power(client suitable for openai-compatible servers), supports multiple languages.""",
|
||||
),
|
||||
parse_method: str = Form(
|
||||
"auto",
|
||||
description="""(Adapted only for pipeline and hybrid backend)The method for parsing PDF:
|
||||
),
|
||||
] = "hybrid-auto-engine",
|
||||
parse_method: Annotated[
|
||||
str,
|
||||
Form(
|
||||
description="""(Adapted only for pipeline and hybrid backend)The method for parsing PDF:
|
||||
- auto: Automatically determine the method based on the file type
|
||||
- txt: Use text extraction method
|
||||
- ocr: Use OCR method for image-based PDFs
|
||||
""",
|
||||
),
|
||||
formula_enable: bool = Form(True, description="Enable formula parsing."),
|
||||
table_enable: bool = Form(True, description="Enable table parsing."),
|
||||
server_url: Optional[str] = Form(
|
||||
None,
|
||||
description="(Adapted only for <vlm/hybrid>-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000",
|
||||
),
|
||||
return_md: bool = Form(True, description="Return markdown content in response"),
|
||||
return_middle_json: bool = Form(
|
||||
False, description="Return middle JSON in response"
|
||||
),
|
||||
return_model_output: bool = Form(
|
||||
False, description="Return model output JSON in response"
|
||||
),
|
||||
return_content_list: bool = Form(
|
||||
False, description="Return content list JSON in response"
|
||||
),
|
||||
return_images: bool = Form(
|
||||
False, description="Return extracted images in response"
|
||||
),
|
||||
response_format_zip: bool = Form(
|
||||
False, description="Return results as a ZIP file instead of JSON"
|
||||
),
|
||||
return_original_file: bool = Form(
|
||||
False,
|
||||
description=(
|
||||
"Include the processed original input file in the ZIP result; "
|
||||
"ignored unless response_format_zip=true"
|
||||
),
|
||||
),
|
||||
start_page_id: int = Form(
|
||||
0, description="The starting page for PDF parsing, beginning from 0"
|
||||
),
|
||||
end_page_id: int = Form(
|
||||
99999, description="The ending page for PDF parsing, beginning from 0"
|
||||
),
|
||||
] = "auto",
|
||||
formula_enable: Annotated[
|
||||
bool,
|
||||
Form(description="Enable formula parsing."),
|
||||
] = True,
|
||||
table_enable: Annotated[
|
||||
bool,
|
||||
Form(description="Enable table parsing."),
|
||||
] = True,
|
||||
server_url: Annotated[
|
||||
Optional[str],
|
||||
Form(
|
||||
description="(Adapted only for <vlm/hybrid>-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000",
|
||||
),
|
||||
] = None,
|
||||
return_md: Annotated[
|
||||
bool,
|
||||
Form(description="Return markdown content in response"),
|
||||
] = True,
|
||||
return_middle_json: Annotated[
|
||||
bool,
|
||||
Form(description="Return middle JSON in response"),
|
||||
] = False,
|
||||
return_model_output: Annotated[
|
||||
bool,
|
||||
Form(description="Return model output JSON in response"),
|
||||
] = False,
|
||||
return_content_list: Annotated[
|
||||
bool,
|
||||
Form(description="Return content list JSON in response"),
|
||||
] = False,
|
||||
return_images: Annotated[
|
||||
bool,
|
||||
Form(description="Return extracted images in response"),
|
||||
] = False,
|
||||
response_format_zip: Annotated[
|
||||
bool,
|
||||
Form(description="Return results as a ZIP file instead of JSON"),
|
||||
] = False,
|
||||
return_original_file: Annotated[
|
||||
bool,
|
||||
Form(
|
||||
description=(
|
||||
"Include the processed original input file in the ZIP result; "
|
||||
"ignored unless response_format_zip=true"
|
||||
),
|
||||
),
|
||||
] = False,
|
||||
start_page_id: Annotated[
|
||||
int,
|
||||
Form(description="The starting page for PDF parsing, beginning from 0"),
|
||||
] = 0,
|
||||
end_page_id: Annotated[
|
||||
int,
|
||||
Form(description="The ending page for PDF parsing, beginning from 0"),
|
||||
] = 99999,
|
||||
) -> ParseRequestOptions:
|
||||
effective_return_original_file = return_original_file and response_format_zip
|
||||
return ParseRequestOptions(
|
||||
@@ -1295,7 +1332,9 @@ def get_task_manager() -> AsyncTaskManager:
|
||||
async def parse_pdf(
|
||||
http_request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
request_options: ParseRequestOptions = Depends(parse_request_form),
|
||||
request_options: Annotated[
|
||||
ParseRequestOptions, Depends(parse_request_form)
|
||||
],
|
||||
):
|
||||
task = await create_async_parse_task(request_options)
|
||||
request_options = None
|
||||
@@ -1340,7 +1379,9 @@ async def parse_pdf(
|
||||
)
|
||||
async def submit_parse_task(
|
||||
http_request: Request,
|
||||
request_options: ParseRequestOptions = Depends(parse_request_form),
|
||||
request_options: Annotated[
|
||||
ParseRequestOptions, Depends(parse_request_form)
|
||||
],
|
||||
):
|
||||
task_manager = get_task_manager()
|
||||
task = await create_async_parse_task(request_options)
|
||||
@@ -1399,6 +1440,7 @@ async def get_async_task_result(
|
||||
return_images=task.return_images,
|
||||
response_format_zip=task.response_format_zip,
|
||||
return_original_file=task.return_original_file,
|
||||
zip_filename=f"{task.task_id}.zip",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -959,23 +959,6 @@ class LightConvBNAct(TheseusLayer):
|
||||
return x
|
||||
|
||||
|
||||
class PaddingSameAsPaddleMaxPool2d(torch.nn.Module):
|
||||
def __init__(self, kernel_size, stride=1):
|
||||
super().__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.stride = stride
|
||||
self.pool = torch.nn.MaxPool2d(kernel_size, stride, padding=0, ceil_mode=True)
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.shape
|
||||
pad_h_total = max(0, (math.ceil(h / self.stride) - 1) * self.stride + self.kernel_size - h)
|
||||
pad_w_total = max(0, (math.ceil(w / self.stride) - 1) * self.stride + self.kernel_size - w)
|
||||
pad_h = pad_h_total // 2
|
||||
pad_w = pad_w_total // 2
|
||||
x = torch.nn.functional.pad(x, [pad_w, pad_w_total - pad_w, pad_h, pad_h_total - pad_h])
|
||||
return self.pool(x)
|
||||
|
||||
|
||||
class StemBlock(TheseusLayer):
|
||||
"""
|
||||
StemBlock for PP-HGNetV2.
|
||||
@@ -1011,7 +994,6 @@ class StemBlock(TheseusLayer):
|
||||
out_channels=mid_channels // 2,
|
||||
kernel_size=2,
|
||||
stride=1,
|
||||
padding="same",
|
||||
use_lab=use_lab,
|
||||
lr_mult=lr_mult,
|
||||
)
|
||||
@@ -1020,7 +1002,6 @@ class StemBlock(TheseusLayer):
|
||||
out_channels=mid_channels,
|
||||
kernel_size=2,
|
||||
stride=1,
|
||||
padding="same",
|
||||
use_lab=use_lab,
|
||||
lr_mult=lr_mult,
|
||||
)
|
||||
@@ -1040,20 +1021,20 @@ class StemBlock(TheseusLayer):
|
||||
use_lab=use_lab,
|
||||
lr_mult=lr_mult,
|
||||
)
|
||||
self.pool = PaddingSameAsPaddleMaxPool2d(
|
||||
kernel_size=2, stride=1,
|
||||
)
|
||||
self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=1, ceil_mode=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.stem1(x)
|
||||
x2 = self.stem2a(x)
|
||||
x2 = self.stem2b(x2)
|
||||
x1 = self.pool(x)
|
||||
x = torch.cat([x1, x2], 1)
|
||||
x = self.stem3(x)
|
||||
x = self.stem4(x)
|
||||
embedding = self.stem1(x)
|
||||
embedding = F.pad(embedding, (0, 1, 0, 1))
|
||||
emb_stem_2a = self.stem2a(embedding)
|
||||
emb_stem_2a = F.pad(emb_stem_2a, (0, 1, 0, 1))
|
||||
emb_stem_2a = self.stem2b(emb_stem_2a)
|
||||
pooled_emb = self.pool(embedding)
|
||||
embedding = torch.cat([pooled_emb, emb_stem_2a], 1)
|
||||
embedding = self.stem3(embedding)
|
||||
embedding = self.stem4(embedding)
|
||||
|
||||
return x
|
||||
return embedding
|
||||
|
||||
|
||||
class HGV2_Block(TheseusLayer):
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "3.0.3"
|
||||
__version__ = "3.0.5"
|
||||
|
||||
@@ -87,6 +87,7 @@ pipeline = [
|
||||
"torchvision",
|
||||
"transformers>=4.57.3,<5.0.0",
|
||||
"onnxruntime>1.17.0",
|
||||
"albumentations>=2.0.8,<3",
|
||||
]
|
||||
gradio = [
|
||||
"gradio>=5.49.1,!=6.0.0,!=6.0.1,!=6.0.2,!=6.1.0,!=6.2.0,!=6.3.0,!=6.4.0,!=6.5.0,!=6.5.1,!=6.6.0,!=6.7.0,<6.9.0",
|
||||
@@ -101,7 +102,7 @@ all = [
|
||||
"mineru[core]",
|
||||
"mineru[mlx] ; sys_platform == 'darwin'",
|
||||
"mineru[vllm] ; sys_platform == 'linux'",
|
||||
"mineru[lmdeploy] ; sys_platform == 'windows'",
|
||||
"mineru[lmdeploy] ; sys_platform == 'win32'",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
Reference in New Issue
Block a user