mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
Compare commits
3 Commits
magic_pdf-
...
magic_pdf-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb1b02e716 | ||
|
|
f0c463ed6d | ||
|
|
efed5faa53 |
@@ -220,7 +220,7 @@ def parse_pdf_for_train(
|
||||
# 解析表格并对table_bboxes进行位置的微调,防止表格周围的文字被截断
|
||||
table_bboxes = parse_tables(page_id, page, model_output_json)
|
||||
table_bboxes = fix_tables(
|
||||
page, table_bboxes, include_table_title=True, scan_line_num=2
|
||||
page, table_bboxes, include_table_title=False, scan_line_num=2
|
||||
) # 修正
|
||||
table_bboxes = fix_table_text_block(
|
||||
text_raw_blocks, table_bboxes
|
||||
|
||||
@@ -54,8 +54,8 @@ def convert_to_train_format(jso: dict) -> []:
|
||||
n_bbox = {"category_id": 10, "bbox": inter_equation["bbox"]}
|
||||
bboxes.append(n_bbox)
|
||||
|
||||
for footnote in v['bak_footer_note_bboxes']:
|
||||
n_bbox = {"category_id": 5, "bbox": footnote["bbox"]}
|
||||
for footnote_bbox in v["bak_footer_note_bboxes"]:
|
||||
n_bbox = {"category_id": 5, "bbox": list(footnote_bbox)}
|
||||
bboxes.append(n_bbox)
|
||||
|
||||
info["bboxes"] = bboxes
|
||||
|
||||
Reference in New Issue
Block a user