Merge pull request #1062 from opendatalab/dev

fix(table): add null check for OCR result in rapid table prediction
2026-03-27 11:08:32 +07:00 · 2024-11-22 17:36:18 +08:00
parent 958168b308 241d4895b7
commit 809bf4793a
3 changed files with 4 additions and 3 deletions
--- a/magic_pdf/model/doc_analyze_by_custom_model.py
+++ b/magic_pdf/model/doc_analyze_by_custom_model.py
@@ -163,7 +163,9 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
        page_width = img_dict["width"]
        page_height = img_dict["height"]
        if start_page_id <= index <= end_page_id:
+            page_start = time.time()
            result = custom_model(img)
+            logger.info(f'-----page_id : {index}, page total time: {round(time.time() - page_start, 2)}-----')
        else:
            result = []
        page_info = {"page_no": index, "height": page_height, "width": page_width}
--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -170,7 +170,6 @@ class CustomPEKModel:
        logger.info('DocAnalysis init done!')

    def __call__(self, image):
-        page_start = time.time()

        # layout检测
        layout_start = time.time()
@@ -272,6 +271,4 @@ class CustomPEKModel:
                    )
            logger.info(f'table time: {round(time.time() - table_start, 2)}')

-        logger.info(f'-----page total time: {round(time.time() - page_start, 2)}-----')
-
        return layout_res
--- a/magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
+++ b/magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
@@ -10,5 +10,7 @@ class RapidTableModel(object):

    def predict(self, image):
        ocr_result, _ = self.ocr_engine(np.asarray(image))
+        if ocr_result is None:
+            return None, None, None
        html_code, table_cell_bboxes, elapse = self.table_model(np.asarray(image), ocr_result)
        return html_code, table_cell_bboxes, elapse