fix(agents): harden cooldown rate-limit handling

2026-03-27 09:21:35 +07:00 · 2026-03-25 12:23:08 +03:00
parent 3b336c57a4
commit 85644e72ee
3 changed files with 30 additions and 4 deletions
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -812,6 +812,23 @@ describe("markAuthProfileFailure — per-model cooldown metadata", () => {
    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
  });

+  it("widens cooldownModel when rate_limit failure during active cooldown has no modelId", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markFailure({ store, now, modelId: undefined });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownReason).toBe("rate_limit");
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+
  it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => {
    const now = 1_000_000;
    const store = makeStoreWithCopilot({
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -511,6 +511,14 @@ function computeNextProfileUsageStats(params: {
        params.existing.cooldownModel !== params.modelId
      ) {
        updatedStats.cooldownModel = undefined;
+      } else if (
+        params.reason === "rate_limit" &&
+        !params.modelId &&
+        params.existing.cooldownModel
+      ) {
+        // Unknown originating model during an active model-scoped cooldown:
+        // widen scope conservatively so no model can bypass on stale metadata.
+        updatedStats.cooldownModel = undefined;
      } else if (params.reason !== "rate_limit") {
        // Non-rate-limit failures are profile-wide — clear model scope even
        // when the same model fails, so that no model can bypass.
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -83,13 +83,14 @@ export type AgentRunLoopResult =
 * Build a human-friendly rate-limit message from a FallbackSummaryError.
 * Includes a countdown when the soonest cooldown expiry is known.
 */
-function buildCopilotCooldownMessage(err: unknown): string {
+function buildRateLimitCooldownMessage(err: unknown): string {
  if (!isFallbackSummaryError(err)) {
    return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
  }
  const expiry = err.soonestCooldownExpiry;
-  if (typeof expiry === "number" && expiry > Date.now()) {
-    const secsLeft = Math.ceil((expiry - Date.now()) / 1000);
+  const now = Date.now();
+  if (typeof expiry === "number" && expiry > now) {
+    const secsLeft = Math.max(1, Math.ceil((expiry - now) / 1000));
    if (secsLeft <= 60) {
      return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`;
    }
@@ -702,7 +703,7 @@ export async function runAgentTurnWithFallback(params: {
      const fallbackText = isBilling
        ? BILLING_ERROR_USER_MESSAGE
        : isRateLimit
-          ? buildCopilotCooldownMessage(err)
+          ? buildRateLimitCooldownMessage(err)
          : isContextOverflow
            ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
            : isRoleOrderingError