diff --git a/mineru/model/table/rec/unet_table/main.py b/mineru/model/table/rec/unet_table/main.py index 7bf42d26..a6c7bb6a 100644 --- a/mineru/model/table/rec/unet_table/main.py +++ b/mineru/model/table/rec/unet_table/main.py @@ -94,7 +94,7 @@ class WiredTableRecognition: t_rec_ocr_list = self.transform_res(cell_box_det_map, polygons, logi_points) # 将每个单元格中的ocr识别结果排序和同行合并,输出的html能完整保留文字的换行格式 t_rec_ocr_list = self.sort_and_gather_ocr_res(t_rec_ocr_list) - # cell_box_map = + logi_points = [t_box_ocr["t_logic_box"] for t_box_ocr in t_rec_ocr_list] cell_box_det_map = { i: [ocr_box_and_text[1] for ocr_box_and_text in t_box_ocr["t_ocr_res"]] diff --git a/mineru/model/table/rec/unet_table/utils_table_recover.py b/mineru/model/table/rec/unet_table/utils_table_recover.py index 9ad00040..e803f300 100644 --- a/mineru/model/table/rec/unet_table/utils_table_recover.py +++ b/mineru/model/table/rec/unet_table/utils_table_recover.py @@ -292,7 +292,8 @@ def plot_html_table( continue if row == row_start and col == col_start: ocr_rec_text = cell_box_map.get(i) - text = "
".join(ocr_rec_text) + # text = "
".join(ocr_rec_text) + text = "".join(ocr_rec_text) # 如果是起始单元格 row_span = row_end - row_start + 1 col_span = col_end - col_start + 1