mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
Compare commits
2 Commits
magic_pdf-
...
magic_pdf-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
efed5faa53 | ||
|
|
05161c6e62 |
@@ -531,6 +531,7 @@ def parse_pdf_for_train(
|
||||
page_info["bak_page_no_bboxes"] = page_no_bboxs
|
||||
page_info["bak_header_bboxes"] = header_bboxs
|
||||
page_info["bak_footer_bboxes"] = footer_bboxs
|
||||
page_info["bak_footer_note_bboxes"] = footnote_bboxes_tmp
|
||||
|
||||
pdf_info_dict[f"page_{page_id}"] = page_info
|
||||
|
||||
|
||||
@@ -54,8 +54,8 @@ def convert_to_train_format(jso: dict) -> []:
|
||||
n_bbox = {"category_id": 10, "bbox": inter_equation["bbox"]}
|
||||
bboxes.append(n_bbox)
|
||||
|
||||
for footnote in v['footnote_bboxes_tmp']:
|
||||
n_bbox = {"category_id": 5, "bbox": footnote["bbox"]}
|
||||
for footnote_bbox in v["bak_footer_note_bboxes"]:
|
||||
n_bbox = {"category_id": 5, "bbox": list(footnote_bbox)}
|
||||
bboxes.append(n_bbox)
|
||||
|
||||
info["bboxes"] = bboxes
|
||||
|
||||
Reference in New Issue
Block a user