diff --git a/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts b/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts index acc6777c064..e221d2eaf6a 100644 --- a/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts +++ b/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts @@ -74,4 +74,45 @@ describe("getSoonestCooldownExpiry", () => { expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000); }); + + it("ignores unrelated model-scoped rate limits for the requested model", () => { + const now = 1_700_000_000_000; + const store = makeStore({ + "openai:p1": { + cooldownUntil: now + 10_000, + cooldownReason: "rate_limit", + cooldownModel: "gpt-5.4", + }, + "openai:p2": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "gpt-5.2", + }, + }); + + expect( + getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }), + ).toBe(now + 30_000); + }); + + it("still counts profile-wide disables for other models", () => { + const now = 1_700_000_000_000; + const store = makeStore({ + "openai:p1": { + cooldownUntil: now + 10_000, + cooldownReason: "rate_limit", + cooldownModel: "gpt-5.4", + disabledUntil: now + 20_000, + }, + "openai:p2": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "gpt-5.2", + }, + }); + + expect( + getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }), + ).toBe(now + 20_000); + }); }); diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 8efa250ab5f..1418b3bae90 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -72,21 +72,15 @@ export function isProfileInCooldown( if (!stats) { return false; } + const ts = now ?? Date.now(); // Model-aware bypass: if the cooldown was caused by a rate_limit on a // specific model and the caller is requesting a *different* model, allow it. // We still honour any active billing/auth disable (`disabledUntil`) — those // are profile-wide and must not be short-circuited by model scoping. - if ( - forModel && - stats.cooldownReason === "rate_limit" && - stats.cooldownModel && - stats.cooldownModel !== forModel && - !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now()) - ) { + if (shouldBypassModelScopedCooldown(stats, ts, forModel)) { return false; } const unusableUntil = resolveProfileUnusableUntil(stats); - const ts = now ?? Date.now(); return unusableUntil ? ts < unusableUntil : false; } @@ -181,13 +175,18 @@ export function resolveProfilesUnavailableReason(params: { export function getSoonestCooldownExpiry( store: AuthProfileStore, profileIds: string[], + options?: { now?: number; forModel?: string }, ): number | null { + const ts = options?.now ?? Date.now(); let soonest: number | null = null; for (const id of profileIds) { const stats = store.usageStats?.[id]; if (!stats) { continue; } + if (shouldBypassModelScopedCooldown(stats, ts, options?.forModel)) { + continue; + } const until = resolveProfileUnusableUntil(stats); if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) { continue; @@ -199,6 +198,20 @@ export function getSoonestCooldownExpiry( return soonest; } +function shouldBypassModelScopedCooldown( + stats: Pick, + now: number, + forModel?: string, +): boolean { + return !!( + forModel && + stats.cooldownReason === "rate_limit" && + stats.cooldownModel && + stats.cooldownModel !== forModel && + !isActiveUnusableWindow(stats.disabledUntil, now) + ); +} + /** * Clear expired cooldowns from all profiles in the store. * diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index ee851a069b8..f734ec9ec2b 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -736,6 +736,62 @@ describe("runWithModelFallback", () => { }); }); + it("filters fallback summary cooldown expiry to attempted model scopes", async () => { + const now = Date.now(); + const unrelatedExpiry = now + 15_000; + const relevantExpiry = now + 90_000; + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-5", + fallbacks: ["openai/gpt-5.2"], + }, + }, + }, + }); + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" }, + "openai:default": { type: "api_key", provider: "openai", key: "openai-key" }, + }, + usageStats: { + "anthropic:default": { + cooldownUntil: unrelatedExpiry, + cooldownReason: "rate_limit", + cooldownModel: "claude-haiku-3-5", + failureCounts: { rate_limit: 1 }, + }, + "openai:default": { + cooldownUntil: relevantExpiry, + cooldownReason: "rate_limit", + cooldownModel: "gpt-5.2", + failureCounts: { rate_limit: 1 }, + }, + }, + }; + + await withTempAuthStore(store, async (tempDir) => { + const run = vi + .fn() + .mockRejectedValue(Object.assign(new Error("rate limited"), { status: 429 })); + + await expect( + runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-5", + agentDir: tempDir, + run, + }), + ).rejects.toMatchObject({ + name: "FallbackSummaryError", + soonestCooldownExpiry: relevantExpiry, + }); + }); + }); + it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => { const cfg = makeFallbacksOnlyCfg(); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index c7918506405..901cee89659 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -252,19 +252,26 @@ function resolveFallbackSoonestCooldownExpiry(params: { readOnly: true, allowKeychainPrompt: false, }); - const allProfileIds = new Set(); + let soonest: number | null = null; for (const candidate of params.candidates) { const ids = resolveAuthProfileOrder({ cfg: params.cfg, store: refreshedStore, provider: candidate.provider, }); - for (const id of ids) { - allProfileIds.add(id); + const candidateSoonest = getSoonestCooldownExpiry(refreshedStore, ids, { + forModel: candidate.model, + }); + if ( + typeof candidateSoonest === "number" && + Number.isFinite(candidateSoonest) && + (soonest === null || candidateSoonest < soonest) + ) { + soonest = candidateSoonest; } } - return getSoonestCooldownExpiry(refreshedStore, [...allProfileIds]); + return soonest; } function resolveImageFallbackCandidates(params: { @@ -452,6 +459,7 @@ function shouldProbePrimaryDuringCooldown(params: { throttleKey: string; authStore: ReturnType; profileIds: string[]; + model: string; }): boolean { if (!params.isPrimary || !params.hasFallbackCandidates) { return false; @@ -461,7 +469,10 @@ function shouldProbePrimaryDuringCooldown(params: { return false; } - const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds); + const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds, { + now: params.now, + forModel: params.model, + }); if (soonest === null || !Number.isFinite(soonest)) { return true; } @@ -512,6 +523,7 @@ function resolveCooldownDecision(params: { throttleKey: params.probeThrottleKey, authStore: params.authStore, profileIds: params.profileIds, + model: params.candidate.model, }); const inferredReason =