mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
Compare commits
15 Commits
release-2.
...
release-2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d30f762ac8 | ||
|
|
f65ff12eea | ||
|
|
8b8ac3e62e | ||
|
|
473154c2b3 | ||
|
|
e2fd491760 | ||
|
|
c29e2d0ca2 | ||
|
|
a5687394d5 | ||
|
|
13819c0596 | ||
|
|
d775f76eec | ||
|
|
5dd73dbcca | ||
|
|
3eda0d10a0 | ||
|
|
e0c3cbb34a | ||
|
|
d2fcdd0fa4 | ||
|
|
af887d63c0 | ||
|
|
a9f28b4436 |
@@ -44,6 +44,12 @@
|
||||
|
||||
# Changelog
|
||||
|
||||
- 2025/09/10 2.2.2 Released
|
||||
- Fixed the issue where the new table recognition model would affect the overall parsing task when some table parsing failed
|
||||
|
||||
- 2025/09/08 2.2.1 Released
|
||||
- Fixed the issue where some newly added models were not downloaded when using the model download command.
|
||||
|
||||
- 2025/09/05 2.2.0 Released
|
||||
- Major Updates
|
||||
- In this version, we focused on improving table parsing accuracy by introducing a new [wired table recognition model](https://github.com/RapidAI/TableStructureRec) and a brand-new hybrid table structure parsing algorithm, significantly enhancing the table recognition capabilities of the `pipeline` backend.
|
||||
|
||||
@@ -44,6 +44,12 @@
|
||||
|
||||
# 更新记录
|
||||
|
||||
- 2025/09/10 2.2.2 发布
|
||||
- 修复新的表格识别模型在部分表格解析失败时影响整体解析任务的问题
|
||||
|
||||
- 2025/09/08 2.2.1 发布
|
||||
- 修复使用模型下载命令时,部分新增模型未下载的问题
|
||||
|
||||
- 2025/09/05 2.2.0 发布
|
||||
- 主要更新
|
||||
- 在这个版本我们重点提升了表格的解析精度,通过引入新的[有线表识别模型](https://github.com/RapidAI/TableStructureRec)和全新的混合表格结构解析算法,显著提升了`pipeline`后端的表格识别能力。
|
||||
|
||||
@@ -221,7 +221,7 @@ class BatchAnalyze:
|
||||
|
||||
# 表格格式清理
|
||||
for table_res_dict in table_res_list_all_page:
|
||||
html_code = table_res_dict["table_res"].get("html", "")
|
||||
html_code = table_res_dict["table_res"].get("html", "") or ""
|
||||
|
||||
# 检查html_code是否包含'<table>'和'</table>'
|
||||
if "<table>" in html_code and "</table>" in html_code:
|
||||
|
||||
@@ -66,7 +66,10 @@ def download_pipeline_models():
|
||||
ModelPath.unimernet_small,
|
||||
ModelPath.pytorch_paddle,
|
||||
ModelPath.layout_reader,
|
||||
ModelPath.slanet_plus
|
||||
ModelPath.slanet_plus,
|
||||
ModelPath.unet_structure,
|
||||
ModelPath.paddle_table_cls,
|
||||
ModelPath.paddle_orientation_classification,
|
||||
]
|
||||
download_finish_path = ""
|
||||
for model_path in model_paths:
|
||||
|
||||
@@ -337,5 +337,5 @@ class UnetTableModel:
|
||||
|
||||
return html_code
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return None
|
||||
logger.warning(e)
|
||||
return wireless_html_code
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.1.11"
|
||||
__version__ = "2.2.1"
|
||||
|
||||
Reference in New Issue
Block a user