mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
fix: update text formatting in table recovery logic for improved output consistency
This commit is contained in:
@@ -94,7 +94,7 @@ class WiredTableRecognition:
|
||||
t_rec_ocr_list = self.transform_res(cell_box_det_map, polygons, logi_points)
|
||||
# 将每个单元格中的ocr识别结果排序和同行合并,输出的html能完整保留文字的换行格式
|
||||
t_rec_ocr_list = self.sort_and_gather_ocr_res(t_rec_ocr_list)
|
||||
# cell_box_map =
|
||||
|
||||
logi_points = [t_box_ocr["t_logic_box"] for t_box_ocr in t_rec_ocr_list]
|
||||
cell_box_det_map = {
|
||||
i: [ocr_box_and_text[1] for ocr_box_and_text in t_box_ocr["t_ocr_res"]]
|
||||
|
||||
@@ -292,7 +292,8 @@ def plot_html_table(
|
||||
continue
|
||||
if row == row_start and col == col_start:
|
||||
ocr_rec_text = cell_box_map.get(i)
|
||||
text = "<br>".join(ocr_rec_text)
|
||||
# text = "<br>".join(ocr_rec_text)
|
||||
text = "".join(ocr_rec_text)
|
||||
# 如果是起始单元格
|
||||
row_span = row_end - row_start + 1
|
||||
col_span = col_end - col_start + 1
|
||||
|
||||
Reference in New Issue
Block a user