Merge pull request #4301 from myhloli/dev

Dev
This commit is contained in:
Xiaomeng Zhao
2026-01-06 14:10:08 +08:00
committed by GitHub
3 changed files with 9 additions and 6 deletions

View File

@@ -17,8 +17,6 @@ from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze
from mineru.backend.hybrid.hybrid_analyze import doc_analyze as hybrid_doc_analyze
from mineru.backend.hybrid.hybrid_analyze import aio_doc_analyze as aio_hybrid_doc_analyze
from mineru.utils.pdf_page_id import get_end_page_id
if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca":
@@ -326,6 +324,7 @@ def _process_hybrid(
server_url=None,
**kwargs,
):
from mineru.backend.hybrid.hybrid_analyze import doc_analyze as hybrid_doc_analyze
"""同步处理hybrid后端逻辑"""
if not backend.endswith("client"):
server_url = None
@@ -378,8 +377,8 @@ async def _async_process_hybrid(
server_url=None,
**kwargs,
):
from mineru.backend.hybrid.hybrid_analyze import aio_doc_analyze as aio_hybrid_doc_analyze
"""异步处理hybrid后端逻辑"""
if not backend.endswith("client"):
server_url = None

View File

@@ -232,13 +232,17 @@ def images_bytes_to_pdf_bytes(image_bytes):
# 载入并转换所有图像为 RGB 模式
image = Image.open(BytesIO(image_bytes))
# 根据 EXIF 信息自动转正(处理手机拍摄的带 Orientation 标记的图片)
ImageOps.exif_transpose(image, in_place=True)
image = ImageOps.exif_transpose(image) or image
# 只在必要时转换
if image.mode != "RGB":
image = image.convert("RGB")
# 第一张图保存为 PDF其余追加
image.save(pdf_buffer, format="PDF", save_all=True)
image.save(
pdf_buffer,
format="PDF",
# save_all=True
)
# 获取 PDF bytes 并重置指针(可选)
pdf_bytes = pdf_buffer.getvalue()

View File

@@ -21,7 +21,7 @@ dependencies = [
"click>=8.1.7",
"loguru>=0.7.2",
"numpy>=1.21.6",
"pdfminer.six==20250506",
"pdfminer.six==20251230",
"tqdm>=4.67.1",
"requests",
"httpx",