perf(inference): adjust batch ratio for high GPU memory

- Increase batch ratio to 8 for GPU memory >=16GB
- Improve inference performance on systems with higher GPU memory
This commit is contained in:
myhloli
2025-03-03 17:30:02 +08:00
parent f20ab37dbd
commit 0b05dff74f

View File

@@ -170,7 +170,9 @@ def doc_analyze(
gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device))))
if gpu_memory is not None and gpu_memory >= 8:
if gpu_memory >= 10:
if gpu_memory >= 16:
batch_ratio = 8
elif gpu_memory >= 10:
batch_ratio = 4
else:
batch_ratio = 2