fix: add lmdeploy-engine parameters to compose.yaml for improved multi-GPU support

This commit is contained in:
myhloli
2025-11-14 10:34:29 +08:00
parent 056f8af0ae
commit d67be0c7de

View File

@@ -73,6 +73,9 @@ services:
# parameters for vllm-engine
# --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
# --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
# parameters for lmdeploy-engine
# --dp 2 # If using multiple GPUs, increase throughput using lmdeploy's multi-GPU parallel mode
# --cache-max-entry-count 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
@@ -98,12 +101,17 @@ services:
command:
--server-name 0.0.0.0
--server-port 7860
--enable-vllm-engine true # Enable the vllm engine for Gradio
# --enable-api false # If you want to disable the API, set this to false
# --max-convert-pages 20 # If you want to limit the number of pages for conversion, set this to a specific number
# parameters for vllm-engine
--enable-vllm-engine true # Enable the vllm engine for Gradio
# --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
# --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
# parameters for lmdeploy-engine
# !!!The lmdeploy and vLLM engines cannot be enabled simultaneously. Please ensure that at most only one engine is active at any given time.!!!
# --enable-lmdeploy-engine true # Enable the lmdeploy engine for Gradio
# --dp 2 # If using multiple GPUs, increase throughput using lmdeploy's multi-GPU parallel mode
# --cache-max-entry-count 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864