fix(agents): filter cooldown eta by model

2026-03-27 09:21:35 +07:00 · 2026-03-25 12:33:10 +03:00
parent 85644e72ee
commit 7c488c070c
4 changed files with 135 additions and 13 deletions
--- a/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts
+++ b/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts
@@ -74,4 +74,45 @@ describe("getSoonestCooldownExpiry", () => {

    expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000);
  });
+
+  it("ignores unrelated model-scoped rate limits for the requested model", () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({
+      "openai:p1": {
+        cooldownUntil: now + 10_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.4",
+      },
+      "openai:p2": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.2",
+      },
+    });
+
+    expect(
+      getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
+    ).toBe(now + 30_000);
+  });
+
+  it("still counts profile-wide disables for other models", () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({
+      "openai:p1": {
+        cooldownUntil: now + 10_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.4",
+        disabledUntil: now + 20_000,
+      },
+      "openai:p2": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.2",
+      },
+    });
+
+    expect(
+      getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
+    ).toBe(now + 20_000);
+  });
 });
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -72,21 +72,15 @@ export function isProfileInCooldown(
  if (!stats) {
    return false;
  }
+  const ts = now ?? Date.now();
  // Model-aware bypass: if the cooldown was caused by a rate_limit on a
  // specific model and the caller is requesting a *different* model, allow it.
  // We still honour any active billing/auth disable (`disabledUntil`) — those
  // are profile-wide and must not be short-circuited by model scoping.
-  if (
-    forModel &&
-    stats.cooldownReason === "rate_limit" &&
-    stats.cooldownModel &&
-    stats.cooldownModel !== forModel &&
-    !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
-  ) {
+  if (shouldBypassModelScopedCooldown(stats, ts, forModel)) {
    return false;
  }
  const unusableUntil = resolveProfileUnusableUntil(stats);
-  const ts = now ?? Date.now();
  return unusableUntil ? ts < unusableUntil : false;
 }

@@ -181,13 +175,18 @@ export function resolveProfilesUnavailableReason(params: {
 export function getSoonestCooldownExpiry(
  store: AuthProfileStore,
  profileIds: string[],
+  options?: { now?: number; forModel?: string },
 ): number | null {
+  const ts = options?.now ?? Date.now();
  let soonest: number | null = null;
  for (const id of profileIds) {
    const stats = store.usageStats?.[id];
    if (!stats) {
      continue;
    }
+    if (shouldBypassModelScopedCooldown(stats, ts, options?.forModel)) {
+      continue;
+    }
    const until = resolveProfileUnusableUntil(stats);
    if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) {
      continue;
@@ -199,6 +198,20 @@ export function getSoonestCooldownExpiry(
  return soonest;
 }

+function shouldBypassModelScopedCooldown(
+  stats: Pick<ProfileUsageStats, "cooldownReason" | "cooldownModel" | "disabledUntil">,
+  now: number,
+  forModel?: string,
+): boolean {
+  return !!(
+    forModel &&
+    stats.cooldownReason === "rate_limit" &&
+    stats.cooldownModel &&
+    stats.cooldownModel !== forModel &&
+    !isActiveUnusableWindow(stats.disabledUntil, now)
+  );
+}
+
 /**
 * Clear expired cooldowns from all profiles in the store.
 *
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -736,6 +736,62 @@ describe("runWithModelFallback", () => {
    });
  });

+  it("filters fallback summary cooldown expiry to attempted model scopes", async () => {
+    const now = Date.now();
+    const unrelatedExpiry = now + 15_000;
+    const relevantExpiry = now + 90_000;
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "anthropic/claude-opus-4-5",
+            fallbacks: ["openai/gpt-5.2"],
+          },
+        },
+      },
+    });
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
+        "openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
+      },
+      usageStats: {
+        "anthropic:default": {
+          cooldownUntil: unrelatedExpiry,
+          cooldownReason: "rate_limit",
+          cooldownModel: "claude-haiku-3-5",
+          failureCounts: { rate_limit: 1 },
+        },
+        "openai:default": {
+          cooldownUntil: relevantExpiry,
+          cooldownReason: "rate_limit",
+          cooldownModel: "gpt-5.2",
+          failureCounts: { rate_limit: 1 },
+        },
+      },
+    };
+
+    await withTempAuthStore(store, async (tempDir) => {
+      const run = vi
+        .fn()
+        .mockRejectedValue(Object.assign(new Error("rate limited"), { status: 429 }));
+
+      await expect(
+        runWithModelFallback({
+          cfg,
+          provider: "anthropic",
+          model: "claude-opus-4-5",
+          agentDir: tempDir,
+          run,
+        }),
+      ).rejects.toMatchObject({
+        name: "FallbackSummaryError",
+        soonestCooldownExpiry: relevantExpiry,
+      });
+    });
+  });
+
  it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => {
    const cfg = makeFallbacksOnlyCfg();

--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -252,19 +252,26 @@ function resolveFallbackSoonestCooldownExpiry(params: {
    readOnly: true,
    allowKeychainPrompt: false,
  });
-  const allProfileIds = new Set<string>();
+  let soonest: number | null = null;
  for (const candidate of params.candidates) {
    const ids = resolveAuthProfileOrder({
      cfg: params.cfg,
      store: refreshedStore,
      provider: candidate.provider,
    });
-    for (const id of ids) {
-      allProfileIds.add(id);
+    const candidateSoonest = getSoonestCooldownExpiry(refreshedStore, ids, {
+      forModel: candidate.model,
+    });
+    if (
+      typeof candidateSoonest === "number" &&
+      Number.isFinite(candidateSoonest) &&
+      (soonest === null || candidateSoonest < soonest)
+    ) {
+      soonest = candidateSoonest;
    }
  }

-  return getSoonestCooldownExpiry(refreshedStore, [...allProfileIds]);
+  return soonest;
 }

 function resolveImageFallbackCandidates(params: {
@@ -452,6 +459,7 @@ function shouldProbePrimaryDuringCooldown(params: {
  throttleKey: string;
  authStore: ReturnType<typeof ensureAuthProfileStore>;
  profileIds: string[];
+  model: string;
 }): boolean {
  if (!params.isPrimary || !params.hasFallbackCandidates) {
    return false;
@@ -461,7 +469,10 @@ function shouldProbePrimaryDuringCooldown(params: {
    return false;
  }

-  const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds);
+  const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds, {
+    now: params.now,
+    forModel: params.model,
+  });
  if (soonest === null || !Number.isFinite(soonest)) {
    return true;
  }
@@ -512,6 +523,7 @@ function resolveCooldownDecision(params: {
    throttleKey: params.probeThrottleKey,
    authStore: params.authStore,
    profileIds: params.profileIds,
+    model: params.candidate.model,
  });

  const inferredReason =