Compare commits

...

3 Commits

Author SHA1 Message Date
许瑞
cb1b02e716 feat: disable auto include table title 2024-03-26 16:46:05 +08:00
许瑞
f0c463ed6d Merge branch 'master' of https://github.com/myhloli/Magic-PDF 2024-03-26 10:17:05 +08:00
许瑞
efed5faa53 feat: modify foot note bbox tmp 2024-03-23 14:34:25 +08:00
2 changed files with 3 additions and 3 deletions

View File

@@ -220,7 +220,7 @@ def parse_pdf_for_train(
# 解析表格并对table_bboxes进行位置的微调,防止表格周围的文字被截断
table_bboxes = parse_tables(page_id, page, model_output_json)
table_bboxes = fix_tables(
page, table_bboxes, include_table_title=True, scan_line_num=2
page, table_bboxes, include_table_title=False, scan_line_num=2
) # 修正
table_bboxes = fix_table_text_block(
text_raw_blocks, table_bboxes

View File

@@ -54,8 +54,8 @@ def convert_to_train_format(jso: dict) -> []:
n_bbox = {"category_id": 10, "bbox": inter_equation["bbox"]}
bboxes.append(n_bbox)
for footnote in v['bak_footer_note_bboxes']:
n_bbox = {"category_id": 5, "bbox": footnote["bbox"]}
for footnote_bbox in v["bak_footer_note_bboxes"]:
n_bbox = {"category_id": 5, "bbox": list(footnote_bbox)}
bboxes.append(n_bbox)
info["bboxes"] = bboxes