fix: add lmdeploy-engine parameters to compose.yaml for improved multi-GPU support

2026-03-27 11:08:32 +07:00 · 2025-11-14 10:34:29 +08:00
parent 056f8af0ae
commit d67be0c7de
1 changed files with 9 additions and 1 deletions
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -73,6 +73,9 @@ services:
      # parameters for vllm-engine
      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
+      # parameters for lmdeploy-engine
+      # --dp 2  # If using multiple GPUs, increase throughput using lmdeploy's multi-GPU parallel mode
+      # --cache-max-entry-count 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
    ulimits:
      memlock: -1
      stack: 67108864
@@ -98,12 +101,17 @@ services:
    command:
      --server-name 0.0.0.0
      --server-port 7860
-      --enable-vllm-engine true  # Enable the vllm engine for Gradio
      # --enable-api false  # If you want to disable the API, set this to false
      # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
      # parameters for vllm-engine
+      --enable-vllm-engine true  # Enable the vllm engine for Gradio
      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
+      # parameters for lmdeploy-engine
+      # !!!The lmdeploy and vLLM engines cannot be enabled simultaneously. Please ensure that at most only one engine is active at any given time.!!!
+      # --enable-lmdeploy-engine true  # Enable the lmdeploy engine for Gradio
+      # --dp 2  # If using multiple GPUs, increase throughput using lmdeploy's multi-GPU parallel mode
+      # --cache-max-entry-count 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
    ulimits:
      memlock: -1
      stack: 67108864