Merge pull request #4304 from opendatalab/dev

Dev
2026-03-27 11:08:32 +07:00 · 2026-01-06 14:46:18 +08:00
parent 33543b76c9 0028514ced
commit b69191ba2b
3 changed files with 12 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -45,6 +45,11 @@

 # Changelog

+- 2026/01/06 2.7.1 Release
+  - fix bug: #4300
+  - Updated pdfminer.six dependency version to resolve [CVE-2025-64512](https://github.com/advisories/GHSA-wf5f-4jwr-ppcp)
+  - Support automatic correction of input image exif orientation to improve OCR recognition accuracy  #4283
+
 - 2025/12/30 2.7.0 Release
  - Simplified installation process. No need to separately install `vlm` acceleration engine dependencies. Using `uv pip install mineru[all]` during installation will install all optional backend dependencies.
  - Added new `hybrid` backend, which combines the advantages of `pipeline` and `vlm` backends. Built on vlm, it integrates some capabilities of pipeline, adding extra extensibility on top of high accuracy:
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -45,6 +45,11 @@

 # 更新记录

+- 2026/01/06 2.7.1 发布
+  - fix bug: #4300
+  - 更新pdfminer.six的依赖版本以解决 [CVE-2025-64512](https://github.com/advisories/GHSA-wf5f-4jwr-ppcp)
+  - 支持输入图像的exif方向自动校正，提升OCR识别效果  #4283
+
 - 2025/12/30 2.7.0 发布
  - 简化安装流程，现在不再需要单独安装`vlm`加速引擎依赖包，安装时使用`uv pip install mineru[all]`即可安装所有可选后端的依赖包。
  - 增加全新后端`hybrid`，该后端结合了`pipeline`和`vlm`后端的优势，在vlm的基础上，融入了pipeline的部分能力，在高精度的基础上增加了额外的扩展性：
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
    "click>=8.1.7",
    "loguru>=0.7.2",
    "numpy>=1.21.6",
-    "pdfminer.six==20251230",
+    "pdfminer.six>=20251230",
    "tqdm>=4.67.1",
    "requests",
    "httpx",
@@ -94,10 +94,10 @@ core = [
    "mineru[pipeline]",
    "mineru[api]",
    "mineru[gradio]",
-    "mineru[mlx] ; sys_platform == 'darwin'",
 ]
 all = [
    "mineru[core]",
+    "mineru[mlx] ; sys_platform == 'darwin'",
    "mineru[vllm] ; sys_platform == 'linux'",
    "mineru[lmdeploy] ; sys_platform == 'windows'",
 ]