mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 02:58:54 +07:00
@@ -17,8 +17,6 @@ from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
|
||||
from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
|
||||
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
|
||||
from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze
|
||||
from mineru.backend.hybrid.hybrid_analyze import doc_analyze as hybrid_doc_analyze
|
||||
from mineru.backend.hybrid.hybrid_analyze import aio_doc_analyze as aio_hybrid_doc_analyze
|
||||
from mineru.utils.pdf_page_id import get_end_page_id
|
||||
|
||||
if os.getenv("MINERU_LMDEPLOY_DEVICE", "") == "maca":
|
||||
@@ -326,6 +324,7 @@ def _process_hybrid(
|
||||
server_url=None,
|
||||
**kwargs,
|
||||
):
|
||||
from mineru.backend.hybrid.hybrid_analyze import doc_analyze as hybrid_doc_analyze
|
||||
"""同步处理hybrid后端逻辑"""
|
||||
if not backend.endswith("client"):
|
||||
server_url = None
|
||||
@@ -378,8 +377,8 @@ async def _async_process_hybrid(
|
||||
server_url=None,
|
||||
**kwargs,
|
||||
):
|
||||
from mineru.backend.hybrid.hybrid_analyze import aio_doc_analyze as aio_hybrid_doc_analyze
|
||||
"""异步处理hybrid后端逻辑"""
|
||||
|
||||
if not backend.endswith("client"):
|
||||
server_url = None
|
||||
|
||||
|
||||
@@ -232,13 +232,17 @@ def images_bytes_to_pdf_bytes(image_bytes):
|
||||
# 载入并转换所有图像为 RGB 模式
|
||||
image = Image.open(BytesIO(image_bytes))
|
||||
# 根据 EXIF 信息自动转正(处理手机拍摄的带 Orientation 标记的图片)
|
||||
ImageOps.exif_transpose(image, in_place=True)
|
||||
image = ImageOps.exif_transpose(image) or image
|
||||
# 只在必要时转换
|
||||
if image.mode != "RGB":
|
||||
image = image.convert("RGB")
|
||||
|
||||
# 第一张图保存为 PDF,其余追加
|
||||
image.save(pdf_buffer, format="PDF", save_all=True)
|
||||
image.save(
|
||||
pdf_buffer,
|
||||
format="PDF",
|
||||
# save_all=True
|
||||
)
|
||||
|
||||
# 获取 PDF bytes 并重置指针(可选)
|
||||
pdf_bytes = pdf_buffer.getvalue()
|
||||
|
||||
@@ -21,7 +21,7 @@ dependencies = [
|
||||
"click>=8.1.7",
|
||||
"loguru>=0.7.2",
|
||||
"numpy>=1.21.6",
|
||||
"pdfminer.six==20250506",
|
||||
"pdfminer.six==20251230",
|
||||
"tqdm>=4.67.1",
|
||||
"requests",
|
||||
"httpx",
|
||||
|
||||
Reference in New Issue
Block a user