diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index 7ccdc9dd4b5..1dd20d03e81 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -812,6 +812,23 @@ describe("markAuthProfileFailure — per-model cooldown metadata", () => { expect(stats?.cooldownModel).toBe("claude-sonnet-4.6"); }); + it("widens cooldownModel when rate_limit failure during active cooldown has no modelId", async () => { + const now = 1_000_000; + const store = makeStoreWithCopilot({ + "github-copilot:github": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + errorCount: 1, + lastFailureAt: now - 1000, + }, + }); + await markFailure({ store, now, modelId: undefined }); + const stats = store.usageStats?.["github-copilot:github"]; + expect(stats?.cooldownReason).toBe("rate_limit"); + expect(stats?.cooldownModel).toBeUndefined(); + }); + it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => { const now = 1_000_000; const store = makeStoreWithCopilot({ diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index f770d056e1d..8efa250ab5f 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -511,6 +511,14 @@ function computeNextProfileUsageStats(params: { params.existing.cooldownModel !== params.modelId ) { updatedStats.cooldownModel = undefined; + } else if ( + params.reason === "rate_limit" && + !params.modelId && + params.existing.cooldownModel + ) { + // Unknown originating model during an active model-scoped cooldown: + // widen scope conservatively so no model can bypass on stale metadata. + updatedStats.cooldownModel = undefined; } else if (params.reason !== "rate_limit") { // Non-rate-limit failures are profile-wide — clear model scope even // when the same model fails, so that no model can bypass. diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 1476986c3bb..75377bc67b5 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -83,13 +83,14 @@ export type AgentRunLoopResult = * Build a human-friendly rate-limit message from a FallbackSummaryError. * Includes a countdown when the soonest cooldown expiry is known. */ -function buildCopilotCooldownMessage(err: unknown): string { +function buildRateLimitCooldownMessage(err: unknown): string { if (!isFallbackSummaryError(err)) { return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes."; } const expiry = err.soonestCooldownExpiry; - if (typeof expiry === "number" && expiry > Date.now()) { - const secsLeft = Math.ceil((expiry - Date.now()) / 1000); + const now = Date.now(); + if (typeof expiry === "number" && expiry > now) { + const secsLeft = Math.max(1, Math.ceil((expiry - now) / 1000)); if (secsLeft <= 60) { return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`; } @@ -702,7 +703,7 @@ export async function runAgentTurnWithFallback(params: { const fallbackText = isBilling ? BILLING_ERROR_USER_MESSAGE : isRateLimit - ? buildCopilotCooldownMessage(err) + ? buildRateLimitCooldownMessage(err) : isContextOverflow ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." : isRoleOrderingError