Files
ollama/envconfig/config.go
Devon Rifkin 46cb7795e1 add ability to turn on debug request logging (#14106)
If `OLLAMA_DEBUG_LOG_REQUESTS` is set, then on server startup a temp
folder will be created. Upon any inference request, the body will be
logged to a file in this folder, as well as a small shell script to
"replay" the request using cURL.

This is just intended for debugging scenarios, not as something to turn
on normally.
2026-03-19 17:08:17 -07:00

457 lines
15 KiB
Go

package envconfig
import (
"encoding/json"
"errors"
"fmt"
"log/slog"
"math"
"net"
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
)
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Default is scheme "http" and host "127.0.0.1:11434"
func Host() *url.URL {
defaultPort := "11434"
s := strings.TrimSpace(Var("OLLAMA_HOST"))
scheme, hostport, ok := strings.Cut(s, "://")
switch {
case !ok:
scheme, hostport = "http", s
if s == "ollama.com" {
scheme, hostport = "https", "ollama.com:443"
}
case scheme == "http":
defaultPort = "80"
case scheme == "https":
defaultPort = "443"
}
hostport, path, _ := strings.Cut(hostport, "/")
host, port, err := net.SplitHostPort(hostport)
if err != nil {
host, port = "127.0.0.1", defaultPort
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
host = ip.String()
} else if hostport != "" {
host = hostport
}
}
if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
port = defaultPort
}
return &url.URL{
Scheme: scheme,
Host: net.JoinHostPort(host, port),
Path: path,
}
}
// ConnectableHost returns Host() with unspecified bind addresses (0.0.0.0, ::)
// replaced by the corresponding loopback address (127.0.0.1, ::1).
// Unspecified addresses are valid for binding a server socket but not for
// connecting as a client, which fails on Windows.
func ConnectableHost() *url.URL {
u := Host()
host, port, err := net.SplitHostPort(u.Host)
if err != nil {
return u
}
if ip := net.ParseIP(host); ip != nil && ip.IsUnspecified() {
if ip.To4() != nil {
host = "127.0.0.1"
} else {
host = "::1"
}
u.Host = net.JoinHostPort(host, port)
}
return u
}
// AllowedOrigins returns a list of allowed origins. AllowedOrigins can be configured via the OLLAMA_ORIGINS environment variable.
func AllowedOrigins() (origins []string) {
if s := Var("OLLAMA_ORIGINS"); s != "" {
origins = strings.Split(s, ",")
}
for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
origins = append(origins,
fmt.Sprintf("http://%s", origin),
fmt.Sprintf("https://%s", origin),
fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
)
}
origins = append(origins,
"app://*",
"file://*",
"tauri://*",
"vscode-webview://*",
"vscode-file://*",
)
return origins
}
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Default is $HOME/.ollama/models
func Models() string {
if s := Var("OLLAMA_MODELS"); s != "" {
return s
}
home, err := os.UserHomeDir()
if err != nil {
panic(err)
}
return filepath.Join(home, ".ollama", "models")
}
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Negative values are treated as infinite. Zero is treated as no keep alive.
// Default is 5 minutes.
func KeepAlive() (keepAlive time.Duration) {
keepAlive = 5 * time.Minute
if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
if d, err := time.ParseDuration(s); err == nil {
keepAlive = d
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
keepAlive = time.Duration(n) * time.Second
}
}
if keepAlive < 0 {
return time.Duration(math.MaxInt64)
}
return keepAlive
}
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
// Zero or Negative values are treated as infinite.
// Default is 5 minutes.
func LoadTimeout() (loadTimeout time.Duration) {
loadTimeout = 5 * time.Minute
if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
if d, err := time.ParseDuration(s); err == nil {
loadTimeout = d
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
loadTimeout = time.Duration(n) * time.Second
}
}
if loadTimeout <= 0 {
return time.Duration(math.MaxInt64)
}
return loadTimeout
}
func Remotes() []string {
var r []string
raw := strings.TrimSpace(Var("OLLAMA_REMOTES"))
if raw == "" {
r = []string{"ollama.com"}
} else {
r = strings.Split(raw, ",")
}
return r
}
func BoolWithDefault(k string) func(defaultValue bool) bool {
return func(defaultValue bool) bool {
if s := Var(k); s != "" {
b, err := strconv.ParseBool(s)
if err != nil {
return true
}
return b
}
return defaultValue
}
}
func Bool(k string) func() bool {
withDefault := BoolWithDefault(k)
return func() bool {
return withDefault(false)
}
}
// LogLevel returns the log level for the application.
// Values are 0 or false INFO (Default), 1 or true DEBUG, 2 TRACE
func LogLevel() slog.Level {
level := slog.LevelInfo
if s := Var("OLLAMA_DEBUG"); s != "" {
if b, _ := strconv.ParseBool(s); b {
level = slog.LevelDebug
} else if i, _ := strconv.ParseInt(s, 10, 64); i != 0 {
level = slog.Level(i * -4)
}
}
return level
}
var (
// FlashAttention enables the experimental flash attention feature.
FlashAttention = BoolWithDefault("OLLAMA_FLASH_ATTENTION")
// DebugLogRequests logs inference requests to disk for replay/debugging.
DebugLogRequests = Bool("OLLAMA_DEBUG_LOG_REQUESTS")
// KvCacheType is the quantization type for the K/V cache.
KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
// NoHistory disables readline history.
NoHistory = Bool("OLLAMA_NOHISTORY")
// NoPrune disables pruning of model blobs on startup.
NoPrune = Bool("OLLAMA_NOPRUNE")
// SchedSpread allows scheduling models across all GPUs.
SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
// MultiUserCache optimizes prompt caching for multi-user scenarios
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
// Enable the new Ollama engine
NewEngine = Bool("OLLAMA_NEW_ENGINE")
// ContextLength sets the default context length
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 0)
// Auth enables authentication between the Ollama client and server
UseAuth = Bool("OLLAMA_AUTH")
// Enable Vulkan backend
EnableVulkan = Bool("OLLAMA_VULKAN")
// NoCloudEnv checks the OLLAMA_NO_CLOUD environment variable.
NoCloudEnv = Bool("OLLAMA_NO_CLOUD")
)
func String(s string) func() string {
return func() string {
return Var(s)
}
}
var (
LLMLibrary = String("OLLAMA_LLM_LIBRARY")
Editor = String("OLLAMA_EDITOR")
CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
VkVisibleDevices = String("GGML_VK_VISIBLE_DEVICES")
GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
)
func Uint(key string, defaultValue uint) func() uint {
return func() uint {
if s := Var(key); s != "" {
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else {
return uint(n)
}
}
return defaultValue
}
}
var (
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 1)
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
)
func Uint64(key string, defaultValue uint64) func() uint64 {
return func() uint64 {
if s := Var(key); s != "" {
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else {
return n
}
}
return defaultValue
}
}
// Set aside VRAM per GPU
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
type EnvVar struct {
Name string
Value any
Description string
}
func AsMap() map[string]EnvVar {
ret := map[string]EnvVar{
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
"OLLAMA_DEBUG_LOG_REQUESTS": {"OLLAMA_DEBUG_LOG_REQUESTS", DebugLogRequests(), "Log inference request bodies and replay curl commands to a temp directory"},
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
"OLLAMA_NO_CLOUD": {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
"OLLAMA_EDITOR": {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
// Informational
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
"NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
}
if runtime.GOOS != "windows" {
// Windows environment variables are case-insensitive so there's no need to duplicate them
ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
}
if runtime.GOOS != "darwin" {
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible by numeric ID"}
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible by UUID or numeric ID"}
ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
ret["OLLAMA_VULKAN"] = EnvVar{"OLLAMA_VULKAN", EnableVulkan(), "Enable experimental Vulkan support"}
}
return ret
}
func Values() map[string]string {
vals := make(map[string]string)
for k, v := range AsMap() {
vals[k] = fmt.Sprintf("%v", v.Value)
}
return vals
}
// Var returns an environment variable stripped of leading and trailing quotes or spaces
func Var(key string) string {
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
}
// serverConfigData holds the parsed fields from ~/.ollama/server.json.
type serverConfigData struct {
DisableOllamaCloud bool `json:"disable_ollama_cloud,omitempty"`
}
var (
serverCfgMu sync.RWMutex
serverCfgLoaded bool
serverCfg serverConfigData
)
func loadServerConfig() {
serverCfgMu.RLock()
if serverCfgLoaded {
serverCfgMu.RUnlock()
return
}
serverCfgMu.RUnlock()
cfg := serverConfigData{}
home, err := os.UserHomeDir()
if err == nil {
path := filepath.Join(home, ".ollama", "server.json")
data, err := os.ReadFile(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
slog.Debug("envconfig: could not read server config", "error", err)
}
} else if err := json.Unmarshal(data, &cfg); err != nil {
slog.Debug("envconfig: could not parse server config", "error", err)
}
}
serverCfgMu.Lock()
defer serverCfgMu.Unlock()
if serverCfgLoaded {
return
}
serverCfg = cfg
serverCfgLoaded = true
}
func cachedServerConfig() serverConfigData {
serverCfgMu.RLock()
defer serverCfgMu.RUnlock()
return serverCfg
}
// ReloadServerConfig refreshes the cached ~/.ollama/server.json settings.
func ReloadServerConfig() {
serverCfgMu.Lock()
serverCfgLoaded = false
serverCfg = serverConfigData{}
serverCfgMu.Unlock()
loadServerConfig()
}
// NoCloud returns true if Ollama cloud features are disabled,
// checking both the OLLAMA_NO_CLOUD environment variable and
// the disable_ollama_cloud field in ~/.ollama/server.json.
func NoCloud() bool {
if NoCloudEnv() {
return true
}
loadServerConfig()
return cachedServerConfig().DisableOllamaCloud
}
// NoCloudSource returns the source of the cloud-disabled decision.
// Returns "none", "env", "config", or "both".
func NoCloudSource() string {
envDisabled := NoCloudEnv()
loadServerConfig()
configDisabled := cachedServerConfig().DisableOllamaCloud
switch {
case envDisabled && configDisabled:
return "both"
case envDisabled:
return "env"
case configDisabled:
return "config"
default:
return "none"
}
}