diff --git a/README.md b/README.md index 5ac984b8..1a647240 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ # Changelog -- 2025/07/22 2.1.2 Released +- 2025/07/22 2.1.3 Released - Bug Fixes - Fixed the issue of excessive memory consumption during the `MFR` step in the `pipeline` backend under certain scenarios #2771 - Fixed the inaccurate matching between `image`/`table` and `caption`/`footnote` under certain conditions #3129 diff --git a/README_zh-CN.md b/README_zh-CN.md index 5e7a43ee..183e15e9 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -43,7 +43,7 @@ # 更新记录 -- 2025/07/22 2.1.2发布 +- 2025/07/22 2.1.3发布 - bug修复 - 修复`pipeline`后端中`MFR`步骤在某些情况下显存消耗过大的问题 #2771 - 修复某些情况下`image`/`table`与`caption`/`footnote`匹配不准确的问题 #3129 diff --git a/mineru/model/mfr/unimernet/Unimernet.py b/mineru/model/mfr/unimernet/Unimernet.py index d7c6789c..8d1b95f6 100644 --- a/mineru/model/mfr/unimernet/Unimernet.py +++ b/mineru/model/mfr/unimernet/Unimernet.py @@ -104,6 +104,10 @@ class UnimernetModel(object): # Create dataset with sorted images dataset = MathDataset(sorted_images, transform=self.model.transform) + + # 如果batch_size> len(sorted_images),则设置为不超过len(sorted_images)的2的阶乘 + batch_size = min(batch_size, 2 ** (len(sorted_images).bit_length() - 1)) + dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0) # Process batches and store results