fix(agents): filter cooldown eta by model

This commit is contained in:
Altay
2026-03-25 12:33:10 +03:00
parent 85644e72ee
commit 7c488c070c
4 changed files with 135 additions and 13 deletions

View File

@@ -74,4 +74,45 @@ describe("getSoonestCooldownExpiry", () => {
expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000); expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000);
}); });
it("ignores unrelated model-scoped rate limits for the requested model", () => {
const now = 1_700_000_000_000;
const store = makeStore({
"openai:p1": {
cooldownUntil: now + 10_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.4",
},
"openai:p2": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.2",
},
});
expect(
getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
).toBe(now + 30_000);
});
it("still counts profile-wide disables for other models", () => {
const now = 1_700_000_000_000;
const store = makeStore({
"openai:p1": {
cooldownUntil: now + 10_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.4",
disabledUntil: now + 20_000,
},
"openai:p2": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.2",
},
});
expect(
getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
).toBe(now + 20_000);
});
}); });

View File

@@ -72,21 +72,15 @@ export function isProfileInCooldown(
if (!stats) { if (!stats) {
return false; return false;
} }
const ts = now ?? Date.now();
// Model-aware bypass: if the cooldown was caused by a rate_limit on a // Model-aware bypass: if the cooldown was caused by a rate_limit on a
// specific model and the caller is requesting a *different* model, allow it. // specific model and the caller is requesting a *different* model, allow it.
// We still honour any active billing/auth disable (`disabledUntil`) — those // We still honour any active billing/auth disable (`disabledUntil`) — those
// are profile-wide and must not be short-circuited by model scoping. // are profile-wide and must not be short-circuited by model scoping.
if ( if (shouldBypassModelScopedCooldown(stats, ts, forModel)) {
forModel &&
stats.cooldownReason === "rate_limit" &&
stats.cooldownModel &&
stats.cooldownModel !== forModel &&
!isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
) {
return false; return false;
} }
const unusableUntil = resolveProfileUnusableUntil(stats); const unusableUntil = resolveProfileUnusableUntil(stats);
const ts = now ?? Date.now();
return unusableUntil ? ts < unusableUntil : false; return unusableUntil ? ts < unusableUntil : false;
} }
@@ -181,13 +175,18 @@ export function resolveProfilesUnavailableReason(params: {
export function getSoonestCooldownExpiry( export function getSoonestCooldownExpiry(
store: AuthProfileStore, store: AuthProfileStore,
profileIds: string[], profileIds: string[],
options?: { now?: number; forModel?: string },
): number | null { ): number | null {
const ts = options?.now ?? Date.now();
let soonest: number | null = null; let soonest: number | null = null;
for (const id of profileIds) { for (const id of profileIds) {
const stats = store.usageStats?.[id]; const stats = store.usageStats?.[id];
if (!stats) { if (!stats) {
continue; continue;
} }
if (shouldBypassModelScopedCooldown(stats, ts, options?.forModel)) {
continue;
}
const until = resolveProfileUnusableUntil(stats); const until = resolveProfileUnusableUntil(stats);
if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) { if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) {
continue; continue;
@@ -199,6 +198,20 @@ export function getSoonestCooldownExpiry(
return soonest; return soonest;
} }
function shouldBypassModelScopedCooldown(
stats: Pick<ProfileUsageStats, "cooldownReason" | "cooldownModel" | "disabledUntil">,
now: number,
forModel?: string,
): boolean {
return !!(
forModel &&
stats.cooldownReason === "rate_limit" &&
stats.cooldownModel &&
stats.cooldownModel !== forModel &&
!isActiveUnusableWindow(stats.disabledUntil, now)
);
}
/** /**
* Clear expired cooldowns from all profiles in the store. * Clear expired cooldowns from all profiles in the store.
* *

View File

@@ -736,6 +736,62 @@ describe("runWithModelFallback", () => {
}); });
}); });
it("filters fallback summary cooldown expiry to attempted model scopes", async () => {
const now = Date.now();
const unrelatedExpiry = now + 15_000;
const relevantExpiry = now + 90_000;
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-5",
fallbacks: ["openai/gpt-5.2"],
},
},
},
});
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
"anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
"openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
},
usageStats: {
"anthropic:default": {
cooldownUntil: unrelatedExpiry,
cooldownReason: "rate_limit",
cooldownModel: "claude-haiku-3-5",
failureCounts: { rate_limit: 1 },
},
"openai:default": {
cooldownUntil: relevantExpiry,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.2",
failureCounts: { rate_limit: 1 },
},
},
};
await withTempAuthStore(store, async (tempDir) => {
const run = vi
.fn()
.mockRejectedValue(Object.assign(new Error("rate limited"), { status: 429 }));
await expect(
runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-5",
agentDir: tempDir,
run,
}),
).rejects.toMatchObject({
name: "FallbackSummaryError",
soonestCooldownExpiry: relevantExpiry,
});
});
});
it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => { it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => {
const cfg = makeFallbacksOnlyCfg(); const cfg = makeFallbacksOnlyCfg();

View File

@@ -252,19 +252,26 @@ function resolveFallbackSoonestCooldownExpiry(params: {
readOnly: true, readOnly: true,
allowKeychainPrompt: false, allowKeychainPrompt: false,
}); });
const allProfileIds = new Set<string>(); let soonest: number | null = null;
for (const candidate of params.candidates) { for (const candidate of params.candidates) {
const ids = resolveAuthProfileOrder({ const ids = resolveAuthProfileOrder({
cfg: params.cfg, cfg: params.cfg,
store: refreshedStore, store: refreshedStore,
provider: candidate.provider, provider: candidate.provider,
}); });
for (const id of ids) { const candidateSoonest = getSoonestCooldownExpiry(refreshedStore, ids, {
allProfileIds.add(id); forModel: candidate.model,
});
if (
typeof candidateSoonest === "number" &&
Number.isFinite(candidateSoonest) &&
(soonest === null || candidateSoonest < soonest)
) {
soonest = candidateSoonest;
} }
} }
return getSoonestCooldownExpiry(refreshedStore, [...allProfileIds]); return soonest;
} }
function resolveImageFallbackCandidates(params: { function resolveImageFallbackCandidates(params: {
@@ -452,6 +459,7 @@ function shouldProbePrimaryDuringCooldown(params: {
throttleKey: string; throttleKey: string;
authStore: ReturnType<typeof ensureAuthProfileStore>; authStore: ReturnType<typeof ensureAuthProfileStore>;
profileIds: string[]; profileIds: string[];
model: string;
}): boolean { }): boolean {
if (!params.isPrimary || !params.hasFallbackCandidates) { if (!params.isPrimary || !params.hasFallbackCandidates) {
return false; return false;
@@ -461,7 +469,10 @@ function shouldProbePrimaryDuringCooldown(params: {
return false; return false;
} }
const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds); const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds, {
now: params.now,
forModel: params.model,
});
if (soonest === null || !Number.isFinite(soonest)) { if (soonest === null || !Number.isFinite(soonest)) {
return true; return true;
} }
@@ -512,6 +523,7 @@ function resolveCooldownDecision(params: {
throttleKey: params.probeThrottleKey, throttleKey: params.probeThrottleKey,
authStore: params.authStore, authStore: params.authStore,
profileIds: params.profileIds, profileIds: params.profileIds,
model: params.candidate.model,
}); });
const inferredReason = const inferredReason =