From 1d1c7ba9ab6431261c855e5152980d79c7d186e4 Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 22 Apr 2025 18:55:10 +0800 Subject: [PATCH 1/4] refactor(table): replace ocr_engine with lang in table model prediction - Remove OCR engine instantiation inside the loop - Pass language directly to the table model instead of OCR engine - Simplify code structure and improve readability --- magic_pdf/model/batch_analyze.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/magic_pdf/model/batch_analyze.py b/magic_pdf/model/batch_analyze.py index 281ef8aa..bd2365d4 100644 --- a/magic_pdf/model/batch_analyze.py +++ b/magic_pdf/model/batch_analyze.py @@ -161,20 +161,13 @@ class BatchAnalyze: for table_res_dict in tqdm(table_res_list_all_page, desc="Table Predict"): _lang = table_res_dict['lang'] atom_model_manager = AtomModelSingleton() - ocr_engine = atom_model_manager.get_atom_model( - atom_model_name='ocr', - ocr_show_log=False, - det_db_box_thresh=0.5, - det_db_unclip_ratio=1.6, - lang=_lang - ) table_model = atom_model_manager.get_atom_model( atom_model_name='table', table_model_name='rapid_table', table_model_path='', table_max_time=400, device='cpu', - ocr_engine=ocr_engine, + lang=_lang, table_sub_model_name='slanet_plus' ) html_code, table_cell_bboxes, logic_points, elapse = table_model.predict(table_res_dict['table_img']) From 69cdea908d19a02baa6d252329f3639b5dc20473 Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 22 Apr 2025 19:12:35 +0800 Subject: [PATCH 2/4] fix(ocr): switch to ch_lite model for Chinese OCR on CPU - Automatically change to ch_lite model when using CPU for Chinese OCR - This modification improves performance on CPU devices --- .../sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py index a0505593..364b4c6d 100644 --- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py @@ -53,6 +53,11 @@ class PytorchPaddleOCR(TextSystem): args = parser.parse_args(args) self.lang = kwargs.get('lang', 'ch') + + device = get_device() + if device == 'cpu' and self.lang == 'ch': + self.lang = 'ch_lite' + if self.lang in latin_lang: self.lang = 'latin' elif self.lang in arabic_lang: From 8d9070db101025e5a2644d24bdcf83270ad7829e Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 22 Apr 2025 19:15:01 +0800 Subject: [PATCH 3/4] fix(lang|performance): resolve lang parameter issue and speed up OCR/table parsing - Fix lang parameter ineffectiveness during table parsing model initialization - Resolve significant slowdown in OCR and table parsing speed in CPU mode - Update changelog in README.md and README_zh-CN.md --- README.md | 3 +++ README_zh-CN.md | 3 +++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 1d02b064..e195dd21 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,9 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte # Changelog +- 2025/04/16 1.3.7 Released + - Fixed the issue where the `lang` parameter was ineffective during table parsing model initialization. + - Fixed the significant slowdown in OCR and table parsing speed in `cpu` mode. - 2025/04/16 1.3.4 Released - Slightly improved the speed of OCR detection by removing some unused blocks. - Fixed page-level sorting errors caused by footnotes in certain cases. diff --git a/README_zh-CN.md b/README_zh-CN.md index 73b749f2..ce8ed24b 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -47,6 +47,9 @@ # 更新记录 +- 2025/04/22 1.3.7 发布 + - 修复表格解析模型初始化时lang参数失效的问题 + - 修复在`cpu`模式下ocr和表格解析速度大幅下降的问题 - 2025/04/16 1.3.4 发布 - 通过移除一些无用的块,小幅提升了ocr-det的速度 - 修复部分情况下由footnote导致的页面内排序错误 From 9c4e779b919b0c8bbb7f4d75a545aa3f99a8ba67 Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 22 Apr 2025 19:15:29 +0800 Subject: [PATCH 4/4] fix(lang|performance): resolve lang parameter issue and speed up OCR/table parsing - Fix lang parameter ineffectiveness during table parsing model initialization - Resolve significant slowdown in OCR and table parsing speed in CPU mode - Update changelog in README.md and README_zh-CN.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e195dd21..fa942541 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte # Changelog -- 2025/04/16 1.3.7 Released +- 2025/04/22 1.3.7 Released - Fixed the issue where the `lang` parameter was ineffective during table parsing model initialization. - Fixed the significant slowdown in OCR and table parsing speed in `cpu` mode. - 2025/04/16 1.3.4 Released