fix: update vllm engine configuration for corex device type in vlm_analyze.py

This commit is contained in:
myhloli
2026-01-30 02:50:41 +08:00
parent 56fca04b22
commit b9d2b3de09

View File

@@ -148,20 +148,27 @@ class ModelSingleton:
except ImportError: except ImportError:
raise ImportError("Please install vllm to use the vllm-async-engine backend.") raise ImportError("Please install vllm to use the vllm-async-engine backend.")
"""
# musa vllm v1 引擎特殊配置 # musa vllm v1 引擎特殊配置
device = get_device() # device = get_device()
if device.startswith("musa"): # if device_type.startswith("musa"):
import torch # import torch
if torch.musa.is_available(): # if torch.musa.is_available():
compilation_config = CompilationConfig( # compilation_config = {
cudagraph_capture_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30], # "cudagraph_capture_sizes": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30],
simple_cuda_graph=True # "simple_cuda_graph": True
) # }
block_size = 32 # block_size = 32
kwargs["compilation_config"] = compilation_config # kwargs["compilation_config"] = compilation_config
kwargs["block_size"] = block_size # kwargs["block_size"] = block_size
"""
# corex vllm v1 引擎特殊配置
device_type = os.getenv("MINERU_LMDEPLOY_DEVICE", "")
if device_type.lower() == "corex":
compilation_config = {
"cudagraph_mode": "FULL_DECODE_ONLY",
"level": 0
}
kwargs["compilation_config"] = compilation_config
if "compilation_config" in kwargs: if "compilation_config" in kwargs:
if isinstance(kwargs["compilation_config"], dict): if isinstance(kwargs["compilation_config"], dict):