fix(agents): filter cooldown eta by model

This commit is contained in:
Altay
2026-03-25 12:33:10 +03:00
parent 85644e72ee
commit 7c488c070c
4 changed files with 135 additions and 13 deletions

View File

@@ -74,4 +74,45 @@ describe("getSoonestCooldownExpiry", () => {
expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000);
});
it("ignores unrelated model-scoped rate limits for the requested model", () => {
const now = 1_700_000_000_000;
const store = makeStore({
"openai:p1": {
cooldownUntil: now + 10_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.4",
},
"openai:p2": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.2",
},
});
expect(
getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
).toBe(now + 30_000);
});
it("still counts profile-wide disables for other models", () => {
const now = 1_700_000_000_000;
const store = makeStore({
"openai:p1": {
cooldownUntil: now + 10_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.4",
disabledUntil: now + 20_000,
},
"openai:p2": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.2",
},
});
expect(
getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
).toBe(now + 20_000);
});
});

View File

@@ -72,21 +72,15 @@ export function isProfileInCooldown(
if (!stats) {
return false;
}
const ts = now ?? Date.now();
// Model-aware bypass: if the cooldown was caused by a rate_limit on a
// specific model and the caller is requesting a *different* model, allow it.
// We still honour any active billing/auth disable (`disabledUntil`) — those
// are profile-wide and must not be short-circuited by model scoping.
if (
forModel &&
stats.cooldownReason === "rate_limit" &&
stats.cooldownModel &&
stats.cooldownModel !== forModel &&
!isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
) {
if (shouldBypassModelScopedCooldown(stats, ts, forModel)) {
return false;
}
const unusableUntil = resolveProfileUnusableUntil(stats);
const ts = now ?? Date.now();
return unusableUntil ? ts < unusableUntil : false;
}
@@ -181,13 +175,18 @@ export function resolveProfilesUnavailableReason(params: {
export function getSoonestCooldownExpiry(
store: AuthProfileStore,
profileIds: string[],
options?: { now?: number; forModel?: string },
): number | null {
const ts = options?.now ?? Date.now();
let soonest: number | null = null;
for (const id of profileIds) {
const stats = store.usageStats?.[id];
if (!stats) {
continue;
}
if (shouldBypassModelScopedCooldown(stats, ts, options?.forModel)) {
continue;
}
const until = resolveProfileUnusableUntil(stats);
if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) {
continue;
@@ -199,6 +198,20 @@ export function getSoonestCooldownExpiry(
return soonest;
}
function shouldBypassModelScopedCooldown(
stats: Pick<ProfileUsageStats, "cooldownReason" | "cooldownModel" | "disabledUntil">,
now: number,
forModel?: string,
): boolean {
return !!(
forModel &&
stats.cooldownReason === "rate_limit" &&
stats.cooldownModel &&
stats.cooldownModel !== forModel &&
!isActiveUnusableWindow(stats.disabledUntil, now)
);
}
/**
* Clear expired cooldowns from all profiles in the store.
*

View File

@@ -736,6 +736,62 @@ describe("runWithModelFallback", () => {
});
});
it("filters fallback summary cooldown expiry to attempted model scopes", async () => {
const now = Date.now();
const unrelatedExpiry = now + 15_000;
const relevantExpiry = now + 90_000;
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-5",
fallbacks: ["openai/gpt-5.2"],
},
},
},
});
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
"anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
"openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
},
usageStats: {
"anthropic:default": {
cooldownUntil: unrelatedExpiry,
cooldownReason: "rate_limit",
cooldownModel: "claude-haiku-3-5",
failureCounts: { rate_limit: 1 },
},
"openai:default": {
cooldownUntil: relevantExpiry,
cooldownReason: "rate_limit",
cooldownModel: "gpt-5.2",
failureCounts: { rate_limit: 1 },
},
},
};
await withTempAuthStore(store, async (tempDir) => {
const run = vi
.fn()
.mockRejectedValue(Object.assign(new Error("rate limited"), { status: 429 }));
await expect(
runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-5",
agentDir: tempDir,
run,
}),
).rejects.toMatchObject({
name: "FallbackSummaryError",
soonestCooldownExpiry: relevantExpiry,
});
});
});
it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => {
const cfg = makeFallbacksOnlyCfg();

View File

@@ -252,19 +252,26 @@ function resolveFallbackSoonestCooldownExpiry(params: {
readOnly: true,
allowKeychainPrompt: false,
});
const allProfileIds = new Set<string>();
let soonest: number | null = null;
for (const candidate of params.candidates) {
const ids = resolveAuthProfileOrder({
cfg: params.cfg,
store: refreshedStore,
provider: candidate.provider,
});
for (const id of ids) {
allProfileIds.add(id);
const candidateSoonest = getSoonestCooldownExpiry(refreshedStore, ids, {
forModel: candidate.model,
});
if (
typeof candidateSoonest === "number" &&
Number.isFinite(candidateSoonest) &&
(soonest === null || candidateSoonest < soonest)
) {
soonest = candidateSoonest;
}
}
return getSoonestCooldownExpiry(refreshedStore, [...allProfileIds]);
return soonest;
}
function resolveImageFallbackCandidates(params: {
@@ -452,6 +459,7 @@ function shouldProbePrimaryDuringCooldown(params: {
throttleKey: string;
authStore: ReturnType<typeof ensureAuthProfileStore>;
profileIds: string[];
model: string;
}): boolean {
if (!params.isPrimary || !params.hasFallbackCandidates) {
return false;
@@ -461,7 +469,10 @@ function shouldProbePrimaryDuringCooldown(params: {
return false;
}
const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds);
const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds, {
now: params.now,
forModel: params.model,
});
if (soonest === null || !Number.isFinite(soonest)) {
return true;
}
@@ -512,6 +523,7 @@ function resolveCooldownDecision(params: {
throttleKey: params.probeThrottleKey,
authStore: params.authStore,
profileIds: params.profileIds,
model: params.candidate.model,
});
const inferredReason =