From fb2d6ade0bf6b1faebc7c54c4b41b4f30bd5a24d Mon Sep 17 00:00:00 2001 From: myhloli Date: Mon, 26 Jan 2026 19:12:32 +0800 Subject: [PATCH] fix: enhance subject-object matching logic with object block type handling --- mineru/backend/hybrid/hybrid_magic_model.py | 3 +- mineru/backend/vlm/vlm_magic_model.py | 3 +- mineru/utils/magic_model_utils.py | 48 ++++++++++----------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/mineru/backend/hybrid/hybrid_magic_model.py b/mineru/backend/hybrid/hybrid_magic_model.py index 2aab61bc..fd27edbb 100644 --- a/mineru/backend/hybrid/hybrid_magic_model.py +++ b/mineru/backend/hybrid/hybrid_magic_model.py @@ -424,7 +424,8 @@ def __tie_up_category_by_index(blocks, subject_block_type, object_block_type): # 调用通用方法 return tie_up_category_by_index( get_subjects, - get_objects + get_objects, + object_block_type=object_block_type ) diff --git a/mineru/backend/vlm/vlm_magic_model.py b/mineru/backend/vlm/vlm_magic_model.py index a4a17ada..ee96f569 100644 --- a/mineru/backend/vlm/vlm_magic_model.py +++ b/mineru/backend/vlm/vlm_magic_model.py @@ -349,7 +349,8 @@ def __tie_up_category_by_index(blocks, subject_block_type, object_block_type): # 调用通用方法 return tie_up_category_by_index( get_subjects, - get_objects + get_objects, + object_block_type=object_block_type ) diff --git a/mineru/utils/magic_model_utils.py b/mineru/utils/magic_model_utils.py index 4c8778dd..3742e8a3 100644 --- a/mineru/utils/magic_model_utils.py +++ b/mineru/utils/magic_model_utils.py @@ -174,7 +174,8 @@ def tie_up_category_by_index( get_subjects_func: Callable, get_objects_func: Callable, extract_subject_func: Callable = None, - extract_object_func: Callable = None + extract_object_func: Callable = None, + object_block_type: str = "object", ): """ 基于index的类别关联方法,用于将主体对象与客体对象进行关联 @@ -256,34 +257,33 @@ def tie_up_category_by_index( elif index_diff == min_index_diff: best_subject_indices.append(i) - # 如果有多个主体的index差值相同,先使用边缘距离,再使用中心点距离作为tiebreaker + # 如果有多个主体的index差值相同(最多两个),根据边缘距离进行筛选 if len(best_subject_indices) > 1: - min_edge_dist = float("inf") - best_edge_indices = [] + # 计算所有候选主体的边缘距离 + edge_distances = [(idx, bbox_distance(obj["bbox"], subjects[idx]["bbox"])) for idx in best_subject_indices] + edge_dist_diff = abs(edge_distances[0][1] - edge_distances[1][1]) - # 第一步:找出边缘距离最小的所有主体 - for idx in best_subject_indices: - edge_dist = bbox_distance(obj["bbox"], subjects[idx]["bbox"]) + for idx, edge_dist in edge_distances: logger.debug(f"Obj index: {obj_index}, Sub index: {subjects[idx]['index']}, Edge distance: {edge_dist}") - if edge_dist < min_edge_dist: - min_edge_dist = edge_dist - best_edge_indices = [idx] - elif edge_dist == min_edge_dist: - best_edge_indices.append(idx) - # 第二步:如果边缘距离也相同,使用中心点距离作为最终tiebreaker - if len(best_edge_indices) > 1: - min_center_dist = float("inf") - best_subject_idx = best_edge_indices[0] - - for idx in best_edge_indices: - center_dist = bbox_center_distance(obj["bbox"], subjects[idx]["bbox"]) - logger.debug(f"Obj index: {obj_index}, Sub index: {subjects[idx]['index']}, Center distance: {center_dist}") - if center_dist < min_center_dist: - min_center_dist = center_dist - best_subject_idx = idx + if edge_dist_diff > 2: + # 边缘距离差值大于2,匹配边缘距离更小的主体 + best_subject_idx = min(edge_distances, key=lambda x: x[1])[0] + logger.debug(f"Obj index: {obj_index}, edge_dist_diff > 2, matching to subject with min edge distance, index: {subjects[best_subject_idx]['index']}") + elif object_block_type == "table_caption": + # 边缘距离差值<=2且为table_caption,匹配index更大的主体 + best_subject_idx = max(best_subject_indices, key=lambda idx: subjects[idx]["index"]) + logger.debug(f"Obj index: {obj_index}, edge_dist_diff <= 2 and table_caption, matching to later subject with index: {subjects[best_subject_idx]['index']}") + elif object_block_type.endswith("footnote"): + # 边缘距离差值<=2且为footnote,匹配index更小的主体 + best_subject_idx = min(best_subject_indices, key=lambda idx: subjects[idx]["index"]) + logger.debug(f"Obj index: {obj_index}, edge_dist_diff <= 2 and footnote, matching to earlier subject with index: {subjects[best_subject_idx]['index']}") else: - best_subject_idx = best_edge_indices[0] + # 边缘距离差值<=2 且不适用特殊匹配规则,使用中心点距离匹配 + center_distances = [(idx, bbox_center_distance(obj["bbox"], subjects[idx]["bbox"])) for idx in best_subject_indices] + for idx, center_dist in center_distances: + logger.debug(f"Obj index: {obj_index}, Sub index: {subjects[idx]['index']}, Center distance: {center_dist}") + best_subject_idx = min(center_distances, key=lambda x: x[1])[0] else: best_subject_idx = best_subject_indices[0]