Merge pull request #1062 from opendatalab/dev

fix(table): add null check for OCR result in rapid table prediction
This commit is contained in:
Xiaomeng Zhao
2024-11-22 17:36:18 +08:00
committed by GitHub
3 changed files with 4 additions and 3 deletions

View File

@@ -163,7 +163,9 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
page_width = img_dict["width"]
page_height = img_dict["height"]
if start_page_id <= index <= end_page_id:
page_start = time.time()
result = custom_model(img)
logger.info(f'-----page_id : {index}, page total time: {round(time.time() - page_start, 2)}-----')
else:
result = []
page_info = {"page_no": index, "height": page_height, "width": page_width}

View File

@@ -170,7 +170,6 @@ class CustomPEKModel:
logger.info('DocAnalysis init done!')
def __call__(self, image):
page_start = time.time()
# layout检测
layout_start = time.time()
@@ -272,6 +271,4 @@ class CustomPEKModel:
)
logger.info(f'table time: {round(time.time() - table_start, 2)}')
logger.info(f'-----page total time: {round(time.time() - page_start, 2)}-----')
return layout_res

View File

@@ -10,5 +10,7 @@ class RapidTableModel(object):
def predict(self, image):
ocr_result, _ = self.ocr_engine(np.asarray(image))
if ocr_result is None:
return None, None, None
html_code, table_cell_bboxes, elapse = self.table_model(np.asarray(image), ocr_result)
return html_code, table_cell_bboxes, elapse