From 1cd85ccfaeddeedbba2cea67d818295fa3089023 Mon Sep 17 00:00:00 2001 From: myhloli Date: Wed, 13 Aug 2025 17:43:52 +0800 Subject: [PATCH] fix: update text formatting in table recovery logic for improved output consistency --- mineru/model/table/rec/unet_table/main.py | 2 +- mineru/model/table/rec/unet_table/utils_table_recover.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mineru/model/table/rec/unet_table/main.py b/mineru/model/table/rec/unet_table/main.py index 7bf42d26..a6c7bb6a 100644 --- a/mineru/model/table/rec/unet_table/main.py +++ b/mineru/model/table/rec/unet_table/main.py @@ -94,7 +94,7 @@ class WiredTableRecognition: t_rec_ocr_list = self.transform_res(cell_box_det_map, polygons, logi_points) # 将每个单元格中的ocr识别结果排序和同行合并,输出的html能完整保留文字的换行格式 t_rec_ocr_list = self.sort_and_gather_ocr_res(t_rec_ocr_list) - # cell_box_map = + logi_points = [t_box_ocr["t_logic_box"] for t_box_ocr in t_rec_ocr_list] cell_box_det_map = { i: [ocr_box_and_text[1] for ocr_box_and_text in t_box_ocr["t_ocr_res"]] diff --git a/mineru/model/table/rec/unet_table/utils_table_recover.py b/mineru/model/table/rec/unet_table/utils_table_recover.py index 9ad00040..e803f300 100644 --- a/mineru/model/table/rec/unet_table/utils_table_recover.py +++ b/mineru/model/table/rec/unet_table/utils_table_recover.py @@ -292,7 +292,8 @@ def plot_html_table( continue if row == row_start and col == col_start: ocr_rec_text = cell_box_map.get(i) - text = "
".join(ocr_rec_text) + # text = "
".join(ocr_rec_text) + text = "".join(ocr_rec_text) # 如果是起始单元格 row_span = row_end - row_start + 1 col_span = col_end - col_start + 1