fix: update text formatting in table recovery logic for improved output consistency

This commit is contained in:
myhloli
2025-08-13 17:43:52 +08:00
parent 1e18361273
commit 1cd85ccfae
2 changed files with 3 additions and 2 deletions

View File

@@ -94,7 +94,7 @@ class WiredTableRecognition:
t_rec_ocr_list = self.transform_res(cell_box_det_map, polygons, logi_points)
# 将每个单元格中的ocr识别结果排序和同行合并输出的html能完整保留文字的换行格式
t_rec_ocr_list = self.sort_and_gather_ocr_res(t_rec_ocr_list)
# cell_box_map =
logi_points = [t_box_ocr["t_logic_box"] for t_box_ocr in t_rec_ocr_list]
cell_box_det_map = {
i: [ocr_box_and_text[1] for ocr_box_and_text in t_box_ocr["t_ocr_res"]]

View File

@@ -292,7 +292,8 @@ def plot_html_table(
continue
if row == row_start and col == col_start:
ocr_rec_text = cell_box_map.get(i)
text = "<br>".join(ocr_rec_text)
# text = "<br>".join(ocr_rec_text)
text = "".join(ocr_rec_text)
# 如果是起始单元格
row_span = row_end - row_start + 1
col_span = col_end - col_start + 1