mirror of
https://github.com/ollama/ollama.git
synced 2026-03-27 02:58:43 +07:00
gptoss: fix memory calc (#11700)
This commit is contained in:
@@ -676,7 +676,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
|
||||
}
|
||||
}
|
||||
fullOffload = 4 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
|
||||
partialOffload = 2 * fullOffload
|
||||
partialOffload = fullOffload
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user