mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
feat: add support for SEAL block type in bbox drawing and update image handling in markdown content
This commit is contained in:
@@ -32,7 +32,14 @@ def make_blocks_to_markdown(paras_of_layout,
|
||||
if para_block['lines'][0]['spans'][0].get('content', ''):
|
||||
para_text = merge_para_with_text(para_block)
|
||||
else:
|
||||
para_text += f""
|
||||
para_text = f""
|
||||
elif para_type == BlockType.SEAL:
|
||||
if len(para_block['lines']) == 0 or len(para_block['lines'][0]['spans']) == 0:
|
||||
continue
|
||||
para_text = f""
|
||||
if para_block['lines'][0]['spans'][0].get('content', []):
|
||||
content = " ".join(para_block['lines'][0]['spans'][0]['content'])
|
||||
para_text += f" \n{content}"
|
||||
elif para_type == BlockType.IMAGE:
|
||||
if mode == MakeMode.NLP_MD:
|
||||
continue
|
||||
|
||||
@@ -188,6 +188,8 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
|
||||
elif nested_block["type"] == BlockType.CHART_FOOTNOTE:
|
||||
bbox = nested_block["bbox"]
|
||||
imgs_footnote.append(bbox)
|
||||
elif block["type"] == BlockType.SEAL:
|
||||
imgs_body.append(bbox)
|
||||
elif block["type"] == BlockType.TITLE:
|
||||
titles.append(bbox)
|
||||
elif block["type"] in [BlockType.TEXT, BlockType.REF_TEXT, BlockType.ABSTRACT]:
|
||||
|
||||
Reference in New Issue
Block a user