fix: correct hyphen handling based on next line's span case

This commit is contained in:
myhloli
2025-12-27 00:25:16 +08:00
parent b1aefabbea
commit e54e0c3001

View File

@@ -77,17 +77,17 @@ def merge_para_with_text(para_block, formula_enable=True, img_buket_path=''):
and span_type == ContentType.TEXT
and is_hyphen_at_line_end(content)
):
# 如果下一行的第一个span是写字母开头,删除连字符
# 如果下一行的第一个span是写字母开头,删除连字符
if (
i+1 < len(para_block['lines'])
and para_block['lines'][i + 1]['spans']
and para_block['lines'][i + 1]['spans'][0]['type'] == ContentType.TEXT
and para_block['lines'][i + 1]['spans'][0]['content']
and para_block['lines'][i + 1]['spans'][0]['content'][0].isupper()
and para_block['lines'][i + 1]['spans'][0]['content'][0].islower()
):
para_text += content
else:
para_text += content[:-1]
else: # 如果没有下一行或者下一行的第一个span不是小写字母开头则保留连字符但不加空格
para_text += content
else: # 西方文本语境下 content间需要空格分隔
para_text += f'{content} '
return para_text