diff --git a/magic_pdf/pdf_parse_for_train.py b/magic_pdf/pdf_parse_for_train.py index 28f8c4a6..92b64e28 100644 --- a/magic_pdf/pdf_parse_for_train.py +++ b/magic_pdf/pdf_parse_for_train.py @@ -531,6 +531,7 @@ def parse_pdf_for_train( page_info["bak_page_no_bboxes"] = page_no_bboxs page_info["bak_header_bboxes"] = header_bboxs page_info["bak_footer_bboxes"] = footer_bboxs + page_info["bak_footer_note_bboxes"] = footnote_bboxes_tmp pdf_info_dict[f"page_{page_id}"] = page_info diff --git a/magic_pdf/train_utils/convert_to_train_format.py b/magic_pdf/train_utils/convert_to_train_format.py index 166ec0f2..be359c79 100644 --- a/magic_pdf/train_utils/convert_to_train_format.py +++ b/magic_pdf/train_utils/convert_to_train_format.py @@ -54,7 +54,7 @@ def convert_to_train_format(jso: dict) -> []: n_bbox = {"category_id": 10, "bbox": inter_equation["bbox"]} bboxes.append(n_bbox) - for footnote in v['footnote_bboxes_tmp']: + for footnote in v['bak_footer_note_bboxes']: n_bbox = {"category_id": 5, "bbox": footnote["bbox"]} bboxes.append(n_bbox)