refactor: add environment variable check to control pipeline enablement in OCR processing

This commit is contained in:
myhloli
2025-12-23 18:12:12 +08:00
parent 6cecafd99d
commit e2a06bbb0a

View File

@@ -389,7 +389,12 @@ def doc_analyze(
_ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method)
_vlm_ocr_enable = False
_force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes")
if _ocr_enable and language in ["ch", "en"] and inline_formula_enable and not _force_pipeline_enable:
if (
_ocr_enable
and language in ["ch", "en"]
and inline_formula_enable
and not _force_pipeline_enable
):
_vlm_ocr_enable = True
results = predictor.batch_two_step_extract(images=images_pil_list)
else:
@@ -451,7 +456,13 @@ async def aio_doc_analyze(
_ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method)
_vlm_ocr_enable = False
if _ocr_enable and language in ["ch", "en"] and inline_formula_enable:
_force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes")
if (
_ocr_enable
and language in ["ch", "en"]
and inline_formula_enable
and not _force_pipeline_enable
):
_vlm_ocr_enable = True
results = await predictor.aio_batch_two_step_extract(images=images_pil_list)
else: