diff --git a/mineru/cli/common.py b/mineru/cli/common.py index 23aaeec7..4eae0de8 100644 --- a/mineru/cli/common.py +++ b/mineru/cli/common.py @@ -17,8 +17,6 @@ from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze -from mineru.backend.hybrid.hybrid_analyze import doc_analyze as hybrid_doc_analyze -from mineru.backend.hybrid.hybrid_analyze import aio_doc_analyze as aio_hybrid_doc_analyze from mineru.utils.pdf_page_id import get_end_page_id if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca": @@ -326,6 +324,7 @@ def _process_hybrid( server_url=None, **kwargs, ): + from mineru.backend.hybrid.hybrid_analyze import doc_analyze as hybrid_doc_analyze """同步处理hybrid后端逻辑""" if not backend.endswith("client"): server_url = None @@ -378,8 +377,8 @@ async def _async_process_hybrid( server_url=None, **kwargs, ): + from mineru.backend.hybrid.hybrid_analyze import aio_doc_analyze as aio_hybrid_doc_analyze """异步处理hybrid后端逻辑""" - if not backend.endswith("client"): server_url = None diff --git a/mineru/utils/pdf_image_tools.py b/mineru/utils/pdf_image_tools.py index 8ff7bdba..591798da 100644 --- a/mineru/utils/pdf_image_tools.py +++ b/mineru/utils/pdf_image_tools.py @@ -232,13 +232,17 @@ def images_bytes_to_pdf_bytes(image_bytes): # 载入并转换所有图像为 RGB 模式 image = Image.open(BytesIO(image_bytes)) # 根据 EXIF 信息自动转正(处理手机拍摄的带 Orientation 标记的图片) - ImageOps.exif_transpose(image, in_place=True) + image = ImageOps.exif_transpose(image) or image # 只在必要时转换 if image.mode != "RGB": image = image.convert("RGB") # 第一张图保存为 PDF,其余追加 - image.save(pdf_buffer, format="PDF", save_all=True) + image.save( + pdf_buffer, + format="PDF", + # save_all=True + ) # 获取 PDF bytes 并重置指针(可选) pdf_bytes = pdf_buffer.getvalue() diff --git a/pyproject.toml b/pyproject.toml index 2c604849..11a25f92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "click>=8.1.7", "loguru>=0.7.2", "numpy>=1.21.6", - "pdfminer.six==20250506", + "pdfminer.six==20251230", "tqdm>=4.67.1", "requests", "httpx",