mirror of
https://github.com/ollama/ollama.git
synced 2026-03-27 02:58:43 +07:00
server: fix ollama ps showing configured instead of actual context length
When context length is clamped to the model's trained context length, ollama ps now shows the actual clamped value instead of the originally configured value.
This commit is contained in:
@@ -80,6 +80,7 @@ type LlamaServer interface {
|
||||
GetPort() int
|
||||
GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
|
||||
HasExited() bool
|
||||
ContextLength() int
|
||||
}
|
||||
|
||||
// llmServer is an instance of a runner hosting a single model
|
||||
@@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *llmServer) ContextLength() int {
|
||||
return s.options.NumCtx
|
||||
}
|
||||
|
||||
func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
|
||||
devices, err := ml.GetDevicesFromRunner(ctx, s)
|
||||
if err != nil {
|
||||
|
||||
@@ -1897,8 +1897,8 @@ func (s *Server) PsHandler(c *gin.Context) {
|
||||
Details: modelDetails,
|
||||
ExpiresAt: v.expiresAt,
|
||||
}
|
||||
if v.Options != nil {
|
||||
mr.ContextLength = v.Options.NumCtx
|
||||
if v.llama != nil {
|
||||
mr.ContextLength = v.llama.ContextLength()
|
||||
}
|
||||
// The scheduler waits to set expiresAt, so if a model is loading it's
|
||||
// possible that it will be set to the unix epoch. For those cases, just
|
||||
|
||||
@@ -804,6 +804,7 @@ func (s *mockLlm) GetPort() int { return -
|
||||
func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil }
|
||||
func (s *mockLlm) HasExited() bool { return false }
|
||||
func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil }
|
||||
func (s *mockLlm) ContextLength() int { return 0 }
|
||||
|
||||
// TestImageGenRunnerCanBeEvicted verifies that an image generation model
|
||||
// loaded in the scheduler can be evicted when idle.
|
||||
|
||||
@@ -347,6 +347,11 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
|
||||
return s.vramSize
|
||||
}
|
||||
|
||||
// Context length is not applicable for image generation.
|
||||
func (s *Server) ContextLength() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
|
||||
return nil, 0, errors.New("not supported")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user