mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
footnote drop逻辑更新
This commit is contained in:
@@ -44,10 +44,15 @@ def remove_spans_by_bboxes_dict(spans, need_remove_spans_bboxes_dict):
|
||||
# logger.info(f"remove spans by bbox dict, drop_tag: {drop_tag}, removed_bboxes: {removed_bboxes}")
|
||||
need_remove_spans = []
|
||||
for span in spans:
|
||||
# 通过判断span的bbox是否在removed_bboxes中, 判断是否需要删除该span
|
||||
for removed_bbox in removed_bboxes:
|
||||
if calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], removed_bbox) > 0.5:
|
||||
need_remove_spans.append(span)
|
||||
break
|
||||
# 当drop_tag为DropTag.FOOTNOTE时, 判断span是否在removed_bboxes中任意一个的下方,如果是,则删除该span
|
||||
elif drop_tag == DropTag.FOOTNOTE and (span['bbox'][1]+span['bbox'][3])/2 > removed_bbox[3] and removed_bbox[0] < (span['bbox'][0]+span['bbox'][2])/2 < removed_bbox[2]:
|
||||
need_remove_spans.append(span)
|
||||
break
|
||||
|
||||
for span in need_remove_spans:
|
||||
spans.remove(span)
|
||||
|
||||
Reference in New Issue
Block a user