feat: enhance DOCX processing by refining image handling and improving logging for inference timing

This commit is contained in:
myhloli
2026-01-06 20:04:06 +08:00
parent 0cbe965d97
commit ad175df3d2
3 changed files with 13 additions and 5 deletions

View File

@@ -218,7 +218,7 @@ def _process_output(
make_func = pipeline_union_make
elif process_mode == "vlm":
make_func = vlm_union_make
elif process_mode == "office":
elif process_mode in office_suffixes:
make_func = office_union_make
else:
raise Exception(f"Unknown process_mode: {process_mode}")

View File

@@ -17,12 +17,13 @@ def office_docx_analyze(
results = convert_binary(file_stream)
infer_time = round(time.time() - infer_start, 2)
logger.debug(f"infer finished, cost: {infer_time}, speed: {round(len(results) / infer_time, 3)} page/s")
safe_time = max(infer_time, 0.01)
logger.debug(f"infer finished, cost: {infer_time}, speed: {round(len(results) / safe_time, 3)} page/s")
# middle_json = result_to_middle_json(
# results,
# image_writer,
# )
middle_json= []
middle_json= {"pdf_info": results}
return middle_json, results

View File

@@ -5,7 +5,7 @@ from pathlib import Path
from typing import BinaryIO, Optional, Union, Any, Final
import logging
from PIL import Image
from PIL import Image, WmfImagePlugin
from loguru import logger
from docx import Document
from docx.oxml.xmlchemy import BaseOxmlElement
@@ -466,7 +466,14 @@ class DocxConverter:
else:
image_bytes = BytesIO(image_data)
pil_image = Image.open(image_bytes)
img_base64 = image_to_b64str(pil_image)
if isinstance(pil_image, WmfImagePlugin.WmfStubImageFile):
logger.warning(f"Skipping WMF image, size: {pil_image.size}")
placeholder = Image.new('RGB', pil_image.size, (240, 240, 240))
img_base64 = image_to_b64str(placeholder)
else:
if pil_image.mode != "RGB":
pil_image = pil_image.convert("RGB")
img_base64 = image_to_b64str(pil_image)
image_block = {
"type": BlockType.IMAGE,
"bbox": [0, 0, 0, 0],