Compare commits

..

14 Commits

Author SHA1 Message Date
myhloli
31f368ab85 Update version.py with new version 2026-03-31 19:34:02 +00:00
Xiaomeng Zhao
2c65149062 Merge pull request #4706 from opendatalab/dev
3.0.5
2026-04-01 03:23:25 +08:00
Xiaomeng Zhao
e976ca2af0 Merge pull request #4705 from myhloli/dev
feat: allow custom zip filename for response in FastAPI file handling
2026-04-01 03:21:10 +08:00
myhloli
87a14040c0 feat: allow custom zip filename for response in FastAPI file handling 2026-04-01 03:12:46 +08:00
Xiaomeng Zhao
739c6343b6 Merge pull request #4704 from opendatalab/dev
3.0.5
2026-04-01 02:46:04 +08:00
Xiaomeng Zhao
11a9a9465e Merge pull request #4703 from myhloli/dev
Dev
2026-04-01 02:44:53 +08:00
myhloli
b583702df1 fix: improve shutdown handling for FastAPI child process on Windows 2026-04-01 02:28:22 +08:00
myhloli
1ca160fdc2 refactor: replace PaddingSameAsPaddleMaxPool2d with torch's MaxPool2d and improve forward method structure 2026-03-31 23:50:52 +08:00
myhloli
93d5251e18 feat: use Annotated for request form parameters in parse_request_form 2026-03-31 23:19:09 +08:00
myhloli
29f767029e feat: add custom JSON schema for file upload in Swagger UI 2026-03-31 22:56:55 +08:00
myhloli
39b903f029 fix: update sys_platform identifier for Windows in pyproject.toml 2026-03-31 22:21:33 +08:00
myhloli
3d508abfd1 feat: add albumentations dependency to pyproject.toml 2026-03-31 20:11:24 +08:00
Xiaomeng Zhao
b9485f1014 Merge pull request #4695 from opendatalab/master
master->dev
2026-03-31 01:51:59 +08:00
myhloli
5869af336b Update version.py with new version 2026-03-30 17:49:00 +00:00
5 changed files with 139 additions and 99 deletions

View File

@@ -110,6 +110,10 @@ class LocalAPIServer:
self.process: subprocess.Popen[bytes] | None = None
self._atexit_registered = False
self.extra_cli_args = tuple(extra_cli_args)
# On Windows, the temporary FastAPI child process can stall during parsing
# startup when launched with stdin=PIPE and an EOF-based shutdown watcher.
# Use explicit process termination there instead of stdin-driven shutdown.
self._use_stdin_shutdown_watcher = os.name != "nt"
def start(self) -> str:
if self.process is not None:
@@ -124,7 +128,12 @@ class LocalAPIServer:
read_max_concurrent_requests(default=DEFAULT_MAX_CONCURRENT_REQUESTS)
)
env["MINERU_API_DISABLE_ACCESS_LOG"] = "1"
env["MINERU_API_SHUTDOWN_ON_STDIN_EOF"] = "1"
if self._use_stdin_shutdown_watcher:
env["MINERU_API_SHUTDOWN_ON_STDIN_EOF"] = "1"
stdin_target = subprocess.PIPE
else:
env.pop("MINERU_API_SHUTDOWN_ON_STDIN_EOF", None)
stdin_target = subprocess.DEVNULL
self.output_root.mkdir(parents=True, exist_ok=True)
command = [
@@ -141,7 +150,7 @@ class LocalAPIServer:
command,
cwd=os.getcwd(),
env=env,
stdin=subprocess.PIPE,
stdin=stdin_target,
)
if not self._atexit_registered:
@@ -154,23 +163,30 @@ class LocalAPIServer:
self.process = None
try:
if process is not None and process.poll() is None:
if process.stdin is not None and not process.stdin.closed:
process.stdin.close()
try:
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
except subprocess.TimeoutExpired:
logger.debug(
"Local mineru-api did not stop after stdin EOF within {}s. Falling back to SIGTERM.",
LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS,
)
process.terminate()
if self._use_stdin_shutdown_watcher:
if process.stdin is not None and not process.stdin.closed:
process.stdin.close()
try:
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
return
except subprocess.TimeoutExpired:
pass
process.kill()
logger.debug(
"Local mineru-api did not stop after stdin EOF within {}s. Falling back to SIGTERM.",
LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS,
)
else:
logger.debug(
"Stopping local mineru-api with process termination on Windows."
)
process.terminate()
try:
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
return
except subprocess.TimeoutExpired:
pass
process.kill()
process.wait(timeout=LOCAL_API_SHUTDOWN_TIMEOUT_SECONDS)
finally:
if self._atexit_registered:
try:

View File

@@ -12,7 +12,7 @@ from contextlib import asynccontextmanager, suppress
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
from typing import Annotated, Any, Optional
import click
import uvicorn
@@ -81,6 +81,11 @@ FILE_PARSE_TASK_ID_HEADER = "X-MinerU-Task-Id"
FILE_PARSE_TASK_STATUS_HEADER = "X-MinerU-Task-Status"
FILE_PARSE_TASK_STATUS_URL_HEADER = "X-MinerU-Task-Status-Url"
FILE_PARSE_TASK_RESULT_URL_HEADER = "X-MinerU-Task-Result-Url"
SWAGGER_UI_FILE_ARRAY_SCHEMA_EXTRA = {
# Swagger UI 5 currently fails to render a usable multi-file picker when
# FastAPI emits OpenAPI 3.1 byte arrays with contentMediaType.
"items": {"type": "string", "format": "binary"}
}
# 并发控制器
_request_semaphore: Optional[asyncio.Semaphore] = None
@@ -609,6 +614,7 @@ def build_result_response(
return_images: bool,
response_format_zip: bool,
return_original_file: bool,
zip_filename: str = "results.zip",
) -> Response:
if response_format_zip:
zip_path = create_result_zip(
@@ -627,7 +633,7 @@ def build_result_response(
return FileResponse(
path=zip_path,
media_type="application/zip",
filename="results.zip",
filename=zip_filename,
status_code=status_code,
)
@@ -683,6 +689,7 @@ def build_sync_file_parse_response(
return_images=task.return_images,
response_format_zip=task.response_format_zip,
return_original_file=task.return_original_file,
zip_filename=f"{task.task_id}.zip",
)
response.headers[FILE_PARSE_TASK_ID_HEADER] = task.task_id
response.headers[FILE_PARSE_TASK_STATUS_HEADER] = task.status
@@ -713,12 +720,17 @@ def build_sync_file_parse_response(
async def parse_request_form(
files: list[UploadFile] = File(
..., description="Upload pdf or image files for parsing"
),
lang_list: list[str] = Form(
["ch"],
description="""(Adapted only for pipeline and hybrid backend)Input the languages in the pdf to improve OCR accuracy.Options:
files: Annotated[
list[UploadFile],
File(
description="Upload pdf or image files for parsing",
json_schema_extra=SWAGGER_UI_FILE_ARRAY_SCHEMA_EXTRA,
),
],
lang_list: Annotated[
list[str],
Form(
description="""(Adapted only for pipeline and hybrid backend)Input the languages in the pdf to improve OCR accuracy.Options:
- ch: Chinese, English, Chinese Traditional.
- ch_lite: Chinese, English, Chinese Traditional, Japanese.
- ch_server: Chinese, English, Chinese Traditional, Japanese.
@@ -737,59 +749,84 @@ async def parse_request_form(
- cyrillic: Russian, Belarusian, Ukrainian, Serbian (Cyrillic), Bulgarian, Mongolian, Abkhazian, Adyghe, Kabardian, Avar, Dargin, Ingush, Chechen, Lak, Lezgin, Tabasaran, Kazakh, Kyrgyz, Tajik, Macedonian, Tatar, Chuvash, Bashkir, Malian, Moldovan, Udmurt, Komi, Ossetian, Buryat, Kalmyk, Tuvan, Sakha, Karakalpak, English.
- devanagari: Hindi, Marathi, Nepali, Bihari, Maithili, Angika, Bhojpuri, Magahi, Santali, Newari, Konkani, Sanskrit, Haryanvi, English.
""",
),
backend: str = Form(
"hybrid-auto-engine",
description="""The backend for parsing:
),
] = ["ch"],
backend: Annotated[
str,
Form(
description="""The backend for parsing:
- pipeline: More general, supports multiple languages, hallucination-free.
- vlm-auto-engine: High accuracy via local computing power, supports Chinese and English documents only.
- vlm-http-client: High accuracy via remote computing power(client suitable for openai-compatible servers), supports Chinese and English documents only.
- hybrid-auto-engine: Next-generation high accuracy solution via local computing power, supports multiple languages.
- hybrid-http-client: High accuracy via remote computing power but requires a little local computing power(client suitable for openai-compatible servers), supports multiple languages.""",
),
parse_method: str = Form(
"auto",
description="""(Adapted only for pipeline and hybrid backend)The method for parsing PDF:
),
] = "hybrid-auto-engine",
parse_method: Annotated[
str,
Form(
description="""(Adapted only for pipeline and hybrid backend)The method for parsing PDF:
- auto: Automatically determine the method based on the file type
- txt: Use text extraction method
- ocr: Use OCR method for image-based PDFs
""",
),
formula_enable: bool = Form(True, description="Enable formula parsing."),
table_enable: bool = Form(True, description="Enable table parsing."),
server_url: Optional[str] = Form(
None,
description="(Adapted only for <vlm/hybrid>-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000",
),
return_md: bool = Form(True, description="Return markdown content in response"),
return_middle_json: bool = Form(
False, description="Return middle JSON in response"
),
return_model_output: bool = Form(
False, description="Return model output JSON in response"
),
return_content_list: bool = Form(
False, description="Return content list JSON in response"
),
return_images: bool = Form(
False, description="Return extracted images in response"
),
response_format_zip: bool = Form(
False, description="Return results as a ZIP file instead of JSON"
),
return_original_file: bool = Form(
False,
description=(
"Include the processed original input file in the ZIP result; "
"ignored unless response_format_zip=true"
),
),
start_page_id: int = Form(
0, description="The starting page for PDF parsing, beginning from 0"
),
end_page_id: int = Form(
99999, description="The ending page for PDF parsing, beginning from 0"
),
] = "auto",
formula_enable: Annotated[
bool,
Form(description="Enable formula parsing."),
] = True,
table_enable: Annotated[
bool,
Form(description="Enable table parsing."),
] = True,
server_url: Annotated[
Optional[str],
Form(
description="(Adapted only for <vlm/hybrid>-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000",
),
] = None,
return_md: Annotated[
bool,
Form(description="Return markdown content in response"),
] = True,
return_middle_json: Annotated[
bool,
Form(description="Return middle JSON in response"),
] = False,
return_model_output: Annotated[
bool,
Form(description="Return model output JSON in response"),
] = False,
return_content_list: Annotated[
bool,
Form(description="Return content list JSON in response"),
] = False,
return_images: Annotated[
bool,
Form(description="Return extracted images in response"),
] = False,
response_format_zip: Annotated[
bool,
Form(description="Return results as a ZIP file instead of JSON"),
] = False,
return_original_file: Annotated[
bool,
Form(
description=(
"Include the processed original input file in the ZIP result; "
"ignored unless response_format_zip=true"
),
),
] = False,
start_page_id: Annotated[
int,
Form(description="The starting page for PDF parsing, beginning from 0"),
] = 0,
end_page_id: Annotated[
int,
Form(description="The ending page for PDF parsing, beginning from 0"),
] = 99999,
) -> ParseRequestOptions:
effective_return_original_file = return_original_file and response_format_zip
return ParseRequestOptions(
@@ -1295,7 +1332,9 @@ def get_task_manager() -> AsyncTaskManager:
async def parse_pdf(
http_request: Request,
background_tasks: BackgroundTasks,
request_options: ParseRequestOptions = Depends(parse_request_form),
request_options: Annotated[
ParseRequestOptions, Depends(parse_request_form)
],
):
task = await create_async_parse_task(request_options)
request_options = None
@@ -1340,7 +1379,9 @@ async def parse_pdf(
)
async def submit_parse_task(
http_request: Request,
request_options: ParseRequestOptions = Depends(parse_request_form),
request_options: Annotated[
ParseRequestOptions, Depends(parse_request_form)
],
):
task_manager = get_task_manager()
task = await create_async_parse_task(request_options)
@@ -1399,6 +1440,7 @@ async def get_async_task_result(
return_images=task.return_images,
response_format_zip=task.response_format_zip,
return_original_file=task.return_original_file,
zip_filename=f"{task.task_id}.zip",
)

View File

@@ -959,23 +959,6 @@ class LightConvBNAct(TheseusLayer):
return x
class PaddingSameAsPaddleMaxPool2d(torch.nn.Module):
def __init__(self, kernel_size, stride=1):
super().__init__()
self.kernel_size = kernel_size
self.stride = stride
self.pool = torch.nn.MaxPool2d(kernel_size, stride, padding=0, ceil_mode=True)
def forward(self, x):
_, _, h, w = x.shape
pad_h_total = max(0, (math.ceil(h / self.stride) - 1) * self.stride + self.kernel_size - h)
pad_w_total = max(0, (math.ceil(w / self.stride) - 1) * self.stride + self.kernel_size - w)
pad_h = pad_h_total // 2
pad_w = pad_w_total // 2
x = torch.nn.functional.pad(x, [pad_w, pad_w_total - pad_w, pad_h, pad_h_total - pad_h])
return self.pool(x)
class StemBlock(TheseusLayer):
"""
StemBlock for PP-HGNetV2.
@@ -1011,7 +994,6 @@ class StemBlock(TheseusLayer):
out_channels=mid_channels // 2,
kernel_size=2,
stride=1,
padding="same",
use_lab=use_lab,
lr_mult=lr_mult,
)
@@ -1020,7 +1002,6 @@ class StemBlock(TheseusLayer):
out_channels=mid_channels,
kernel_size=2,
stride=1,
padding="same",
use_lab=use_lab,
lr_mult=lr_mult,
)
@@ -1040,20 +1021,20 @@ class StemBlock(TheseusLayer):
use_lab=use_lab,
lr_mult=lr_mult,
)
self.pool = PaddingSameAsPaddleMaxPool2d(
kernel_size=2, stride=1,
)
self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=1, ceil_mode=True)
def forward(self, x):
x = self.stem1(x)
x2 = self.stem2a(x)
x2 = self.stem2b(x2)
x1 = self.pool(x)
x = torch.cat([x1, x2], 1)
x = self.stem3(x)
x = self.stem4(x)
embedding = self.stem1(x)
embedding = F.pad(embedding, (0, 1, 0, 1))
emb_stem_2a = self.stem2a(embedding)
emb_stem_2a = F.pad(emb_stem_2a, (0, 1, 0, 1))
emb_stem_2a = self.stem2b(emb_stem_2a)
pooled_emb = self.pool(embedding)
embedding = torch.cat([pooled_emb, emb_stem_2a], 1)
embedding = self.stem3(embedding)
embedding = self.stem4(embedding)
return x
return embedding
class HGV2_Block(TheseusLayer):

View File

@@ -1 +1 @@
__version__ = "3.0.3"
__version__ = "3.0.5"

View File

@@ -87,6 +87,7 @@ pipeline = [
"torchvision",
"transformers>=4.57.3,<5.0.0",
"onnxruntime>1.17.0",
"albumentations>=2.0.8,<3",
]
gradio = [
"gradio>=5.49.1,!=6.0.0,!=6.0.1,!=6.0.2,!=6.1.0,!=6.2.0,!=6.3.0,!=6.4.0,!=6.5.0,!=6.5.1,!=6.6.0,!=6.7.0,<6.9.0",
@@ -101,7 +102,7 @@ all = [
"mineru[core]",
"mineru[mlx] ; sys_platform == 'darwin'",
"mineru[vllm] ; sys_platform == 'linux'",
"mineru[lmdeploy] ; sys_platform == 'windows'",
"mineru[lmdeploy] ; sys_platform == 'win32'",
]
[project.urls]