mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 02:58:54 +07:00
refactor: add environment variable check to control pipeline enablement in OCR processing
This commit is contained in:
@@ -389,7 +389,12 @@ def doc_analyze(
|
||||
_ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method)
|
||||
_vlm_ocr_enable = False
|
||||
_force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes")
|
||||
if _ocr_enable and language in ["ch", "en"] and inline_formula_enable and not _force_pipeline_enable:
|
||||
if (
|
||||
_ocr_enable
|
||||
and language in ["ch", "en"]
|
||||
and inline_formula_enable
|
||||
and not _force_pipeline_enable
|
||||
):
|
||||
_vlm_ocr_enable = True
|
||||
results = predictor.batch_two_step_extract(images=images_pil_list)
|
||||
else:
|
||||
@@ -451,7 +456,13 @@ async def aio_doc_analyze(
|
||||
|
||||
_ocr_enable = ocr_classify(pdf_bytes, parse_method=parse_method)
|
||||
_vlm_ocr_enable = False
|
||||
if _ocr_enable and language in ["ch", "en"] and inline_formula_enable:
|
||||
_force_pipeline_enable = os.getenv("MINERU_HYBRID_FORCE_PIPELINE_ENABLE", "0").lower() in ("1", "true", "yes")
|
||||
if (
|
||||
_ocr_enable
|
||||
and language in ["ch", "en"]
|
||||
and inline_formula_enable
|
||||
and not _force_pipeline_enable
|
||||
):
|
||||
_vlm_ocr_enable = True
|
||||
results = await predictor.aio_batch_two_step_extract(images=images_pil_list)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user