diff --git a/magic_pdf/model/magic_model.py b/magic_pdf/model/magic_model.py index d5b63dbe..0f4be48e 100644 --- a/magic_pdf/model/magic_model.py +++ b/magic_pdf/model/magic_model.py @@ -553,6 +553,42 @@ class MagicModel: } ) + for i in range(len(objects)): + j = i + OBJ_IDX_OFFSET + if j in seen_idx: + continue + seen_idx.add(j) + nearest_dis, nearest_sub_idx = float('inf'), -1 + for k in range(len(subjects)): + dis = bbox_distance(objects[i]['bbox'], subjects[k]['bbox']) + if dis < nearest_dis: + nearest_dis = dis + nearest_sub_idx = k + + for k in range(len(subjects)): + if k != nearest_sub_idx: continue + if k in seen_sub_idx: + for kk in range(len(ret)): + if ret[kk]['sub_idx'] == k: + ret[kk]['obj_bboxes'].append({'score': objects[i]['score'], 'bbox': objects[i]['bbox']}) + break + else: + ret.append( + { + 'sub_bbox': { + 'bbox': subjects[k]['bbox'], + 'score': subjects[k]['score'], + }, + 'obj_bboxes': [ + {'score': objects[i]['score'], 'bbox': objects[i]['bbox']} + ], + 'sub_idx': k, + } + ) + seen_sub_idx.add(k) + seen_idx.add(k) + + for i in range(len(subjects)): if i in seen_sub_idx: continue