mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
Merge pull request #1062 from opendatalab/dev
fix(table): add null check for OCR result in rapid table prediction
This commit is contained in:
@@ -163,7 +163,9 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
|
||||
page_width = img_dict["width"]
|
||||
page_height = img_dict["height"]
|
||||
if start_page_id <= index <= end_page_id:
|
||||
page_start = time.time()
|
||||
result = custom_model(img)
|
||||
logger.info(f'-----page_id : {index}, page total time: {round(time.time() - page_start, 2)}-----')
|
||||
else:
|
||||
result = []
|
||||
page_info = {"page_no": index, "height": page_height, "width": page_width}
|
||||
|
||||
@@ -170,7 +170,6 @@ class CustomPEKModel:
|
||||
logger.info('DocAnalysis init done!')
|
||||
|
||||
def __call__(self, image):
|
||||
page_start = time.time()
|
||||
|
||||
# layout检测
|
||||
layout_start = time.time()
|
||||
@@ -272,6 +271,4 @@ class CustomPEKModel:
|
||||
)
|
||||
logger.info(f'table time: {round(time.time() - table_start, 2)}')
|
||||
|
||||
logger.info(f'-----page total time: {round(time.time() - page_start, 2)}-----')
|
||||
|
||||
return layout_res
|
||||
|
||||
@@ -10,5 +10,7 @@ class RapidTableModel(object):
|
||||
|
||||
def predict(self, image):
|
||||
ocr_result, _ = self.ocr_engine(np.asarray(image))
|
||||
if ocr_result is None:
|
||||
return None, None, None
|
||||
html_code, table_cell_bboxes, elapse = self.table_model(np.asarray(image), ocr_result)
|
||||
return html_code, table_cell_bboxes, elapse
|
||||
Reference in New Issue
Block a user