mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
feat: enhance DOCX processing by refining image handling and improving logging for inference timing
This commit is contained in:
@@ -218,7 +218,7 @@ def _process_output(
|
||||
make_func = pipeline_union_make
|
||||
elif process_mode == "vlm":
|
||||
make_func = vlm_union_make
|
||||
elif process_mode == "office":
|
||||
elif process_mode in office_suffixes:
|
||||
make_func = office_union_make
|
||||
else:
|
||||
raise Exception(f"Unknown process_mode: {process_mode}")
|
||||
|
||||
@@ -17,12 +17,13 @@ def office_docx_analyze(
|
||||
results = convert_binary(file_stream)
|
||||
|
||||
infer_time = round(time.time() - infer_start, 2)
|
||||
logger.debug(f"infer finished, cost: {infer_time}, speed: {round(len(results) / infer_time, 3)} page/s")
|
||||
safe_time = max(infer_time, 0.01)
|
||||
logger.debug(f"infer finished, cost: {infer_time}, speed: {round(len(results) / safe_time, 3)} page/s")
|
||||
|
||||
# middle_json = result_to_middle_json(
|
||||
# results,
|
||||
# image_writer,
|
||||
# )
|
||||
middle_json= []
|
||||
middle_json= {"pdf_info": results}
|
||||
|
||||
return middle_json, results
|
||||
@@ -5,7 +5,7 @@ from pathlib import Path
|
||||
from typing import BinaryIO, Optional, Union, Any, Final
|
||||
|
||||
import logging
|
||||
from PIL import Image
|
||||
from PIL import Image, WmfImagePlugin
|
||||
from loguru import logger
|
||||
from docx import Document
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
@@ -466,7 +466,14 @@ class DocxConverter:
|
||||
else:
|
||||
image_bytes = BytesIO(image_data)
|
||||
pil_image = Image.open(image_bytes)
|
||||
img_base64 = image_to_b64str(pil_image)
|
||||
if isinstance(pil_image, WmfImagePlugin.WmfStubImageFile):
|
||||
logger.warning(f"Skipping WMF image, size: {pil_image.size}")
|
||||
placeholder = Image.new('RGB', pil_image.size, (240, 240, 240))
|
||||
img_base64 = image_to_b64str(placeholder)
|
||||
else:
|
||||
if pil_image.mode != "RGB":
|
||||
pil_image = pil_image.convert("RGB")
|
||||
img_base64 = image_to_b64str(pil_image)
|
||||
image_block = {
|
||||
"type": BlockType.IMAGE,
|
||||
"bbox": [0, 0, 0, 0],
|
||||
|
||||
Reference in New Issue
Block a user