feat: add support for SEAL block type in bbox drawing and update image handling in markdown content

This commit is contained in:
myhloli
2026-03-21 03:33:34 +08:00
parent 01ab656487
commit 09fc22fcc2
2 changed files with 10 additions and 1 deletions

View File

@@ -32,7 +32,14 @@ def make_blocks_to_markdown(paras_of_layout,
if para_block['lines'][0]['spans'][0].get('content', ''):
para_text = merge_para_with_text(para_block)
else:
para_text += f"![]({img_buket_path}/{para_block['lines'][0]['spans'][0]['image_path']})"
para_text = f"![]({img_buket_path}/{para_block['lines'][0]['spans'][0]['image_path']})"
elif para_type == BlockType.SEAL:
if len(para_block['lines']) == 0 or len(para_block['lines'][0]['spans']) == 0:
continue
para_text = f"![]({img_buket_path}/{para_block['lines'][0]['spans'][0]['image_path']})"
if para_block['lines'][0]['spans'][0].get('content', []):
content = " ".join(para_block['lines'][0]['spans'][0]['content'])
para_text += f" \n{content}"
elif para_type == BlockType.IMAGE:
if mode == MakeMode.NLP_MD:
continue

View File

@@ -188,6 +188,8 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
elif nested_block["type"] == BlockType.CHART_FOOTNOTE:
bbox = nested_block["bbox"]
imgs_footnote.append(bbox)
elif block["type"] == BlockType.SEAL:
imgs_body.append(bbox)
elif block["type"] == BlockType.TITLE:
titles.append(bbox)
elif block["type"] in [BlockType.TEXT, BlockType.REF_TEXT, BlockType.ABSTRACT]: