refactor: add environment variable check to control pipeline enablement in OCR processing

2026-03-27 11:08:32 +07:00 · 2025-12-23 18:12:12 +08:00
parent 6cecafd99d
commit e2a06bbb0a
1 changed files with 13 additions and 2 deletions
--- a/mineru/backend/hybrid/hybrid_analyze.py
+++ b/mineru/backend/hybrid/hybrid_analyze.py
@@ -389,7 +389,12 @@ def doc_analyze(
    _ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method)
    _vlm_ocr_enable = False
    _force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes")
-    if _ocr_enable and language in ["ch", "en"] and inline_formula_enable and not _force_pipeline_enable:
+    if (
+            _ocr_enable
+            and language in ["ch", "en"]
+            and inline_formula_enable
+            and not _force_pipeline_enable
+    ):
        _vlm_ocr_enable = True
        results = predictor.batch_two_step_extract(images=images_pil_list)
    else:
@@ -451,7 +456,13 @@ async def aio_doc_analyze(

    _ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method)
    _vlm_ocr_enable = False
-    if _ocr_enable and language in ["ch", "en"] and inline_formula_enable:
+    _force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes")
+    if (
+            _ocr_enable
+            and language in ["ch", "en"]
+            and inline_formula_enable
+            and not _force_pipeline_enable
+    ):
        _vlm_ocr_enable = True
        results = await predictor.aio_batch_two_step_extract(images=images_pil_list)
    else: