From e2a06bbb0a7ff0850c0e3eb0f558da1afec4d8db Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 23 Dec 2025 18:12:12 +0800 Subject: [PATCH] refactor: add environment variable check to control pipeline enablement in OCR processing --- mineru/backend/hybrid/hybrid_analyze.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mineru/backend/hybrid/hybrid_analyze.py b/mineru/backend/hybrid/hybrid_analyze.py index 8f690e55..ee4d4dae 100644 --- a/mineru/backend/hybrid/hybrid_analyze.py +++ b/mineru/backend/hybrid/hybrid_analyze.py @@ -389,7 +389,12 @@ def doc_analyze( _ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method) _vlm_ocr_enable = False _force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes") - if _ocr_enable and language in ["ch", "en"] and inline_formula_enable and not _force_pipeline_enable: + if ( + _ocr_enable + and language in ["ch", "en"] + and inline_formula_enable + and not _force_pipeline_enable + ): _vlm_ocr_enable = True results = predictor.batch_two_step_extract(images=images_pil_list) else: @@ -451,7 +456,13 @@ async def aio_doc_analyze( _ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method) _vlm_ocr_enable = False - if _ocr_enable and language in ["ch", "en"] and inline_formula_enable: + _force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes") + if ( + _ocr_enable + and language in ["ch", "en"] + and inline_formula_enable + and not _force_pipeline_enable + ): _vlm_ocr_enable = True results = await predictor.aio_batch_two_step_extract(images=images_pil_list) else: