refactor: expand OCR text conditions for category assignment in analysis scripts

This commit is contained in:
myhloli
2025-12-23 18:36:13 +08:00
parent e2a06bbb0a
commit 6d7d1c3b0c
2 changed files with 11 additions and 2 deletions

View File

@@ -290,7 +290,8 @@ def _process_ocr_and_formulas(
if ( if (
ocr_text in [ ocr_text in [
'204号', '20', '2', '2号', '20号', '','204', '204号', '20', '2', '2号', '20号', '','204',
'(cid:)', '(cid:)', '(ci:)', '(cd:1)', 'cd:)', 'c)', '(cd:)', 'c', 'id:)',
':)', '√:)', '√i:)', 'i:)', ':' , 'i:)',
] ]
and ocr_score < 0.8 and ocr_score < 0.8
and layout_res_width < layout_res_height and layout_res_width < layout_res_height

View File

@@ -420,7 +420,15 @@ class BatchAnalyze:
layout_res_item['poly'][4], layout_res_item['poly'][5]] layout_res_item['poly'][4], layout_res_item['poly'][5]]
layout_res_width = layout_res_bbox[2] - layout_res_bbox[0] layout_res_width = layout_res_bbox[2] - layout_res_bbox[0]
layout_res_height = layout_res_bbox[3] - layout_res_bbox[1] layout_res_height = layout_res_bbox[3] - layout_res_bbox[1]
if ocr_text in ['204号', '20', '2', '2号', '20号', '', '204'] and ocr_score < 0.8 and layout_res_width < layout_res_height: if (
ocr_text in [
'204号', '20', '2', '2号', '20号', '', '204',
'(cid:)', '(ci:)', '(cd:1)', 'cd:)', 'c)', '(cd:)', 'c', 'id:)',
':)', '√:)', '√i:)', 'i:)', ':', 'i:)',
]
and ocr_score < 0.8
and layout_res_width < layout_res_height
):
layout_res_item['category_id'] = 16 layout_res_item['category_id'] = 16
total_processed += len(img_crop_list) total_processed += len(img_crop_list)