From b91fa68b3130271d55363de4e0cdfbe3363af724 Mon Sep 17 00:00:00 2001
From: YuTengjing <ytj2713151713@gmail.com>
Date: Sat, 7 Mar 2026 23:26:57 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20detect=20exceeded=20conte?=
 =?UTF-8?q?xt=20window=20errors=20from=20message=20text=20(#12788)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 locales/en-US/error.json                      |  2 +
 locales/zh-CN/error.json                      |  2 +
 .../core/anthropicCompatibleFactory/index.ts  | 38 +++++++++
 .../openaiCompatibleFactory/index.test.ts     | 78 +++++++++++++++++-
 .../src/core/openaiCompatibleFactory/index.ts | 23 ++++++
 .../src/utils/googleErrorParser.test.ts       | 25 ++++++
 .../src/utils/googleErrorParser.ts            | 10 +++
 .../isExceededContextWindowError.test.ts      | 81 +++++++++++++++++++
 .../src/utils/isExceededContextWindowError.ts | 19 +++++
 .../src/utils/isQuotaLimitError.test.ts       | 46 +++++++++++
 .../src/utils/isQuotaLimitError.ts            | 14 ++++
 src/features/Conversation/Error/index.tsx     |  4 -
 src/locales/default/error.ts                  |  4 +
 13 files changed, 340 insertions(+), 6 deletions(-)
 create mode 100644 packages/model-runtime/src/utils/isExceededContextWindowError.test.ts
 create mode 100644 packages/model-runtime/src/utils/isExceededContextWindowError.ts
 create mode 100644 packages/model-runtime/src/utils/isQuotaLimitError.test.ts
 create mode 100644 packages/model-runtime/src/utils/isQuotaLimitError.ts

diff --git a/locales/en-US/error.json b/locales/en-US/error.json
index b6807839c5..1758eb2488 100644
--- a/locales/en-US/error.json
+++ b/locales/en-US/error.json
@@ -67,6 +67,7 @@
   "response.ConnectionCheckFailed": "The request returned empty. Please check if the API proxy address does not end with `/v1`.",
   "response.CreateMessageError": "Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.",
   "response.ExceededContextWindow": "The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.",
+  "response.ExceededContextWindowCloud": "The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.",
   "response.FreePlanLimit": "You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.",
   "response.GoogleAIBlockReason.BLOCKLIST": "Your content contains prohibited terms. Please review and modify your input, then try again.",
   "response.GoogleAIBlockReason.IMAGE_SAFETY": "The generated image was blocked for safety reasons. Please try modifying your image request.",
@@ -106,6 +107,7 @@
   "response.PluginSettingsInvalid": "This skill needs to be correctly configured before it can be used. Please check if your configuration is correct",
   "response.ProviderBizError": "Error requesting {{provider}} service, please troubleshoot or retry based on the following information",
   "response.QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
+  "response.QuotaLimitReachedCloud": "The model service is currently under heavy load. Please try again later.",
   "response.ServerAgentRuntimeError": "Sorry, the Agent service is currently unavailable. Please try again later or contact us via email for support.",
   "response.StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
   "response.SubscriptionKeyMismatch": "We apologize for the inconvenience. Due to a temporary system malfunction, your current subscription usage is inactive. Please click the button below to restore your subscription, or contact us via email for support.",
diff --git a/locales/zh-CN/error.json b/locales/zh-CN/error.json
index d470cb3c23..6f5c46656d 100644
--- a/locales/zh-CN/error.json
+++ b/locales/zh-CN/error.json
@@ -67,6 +67,7 @@
   "response.ConnectionCheckFailed": "测试返回为空。请确认 API 代理地址末尾未包含 `/v1`",
   "response.CreateMessageError": "消息未能发送。建议先复制内容再重试；刷新页面后该消息不会保留",
   "response.ExceededContextWindow": "上下文长度超出模型限制。请减少内容量后重试",
+  "response.ExceededContextWindowCloud": "当前对话内容过长，无法继续处理。请编辑最后一条消息减少输入内容或者删除一些消息重试。",
   "response.FreePlanLimit": "当前计划不支持该功能。请升级到付费计划后继续",
   "response.GoogleAIBlockReason.BLOCKLIST": "内容包含被禁止的词汇。请修改后重试",
   "response.GoogleAIBlockReason.IMAGE_SAFETY": "图像生成请求因安全策略被阻止。请调整描述后重试",
@@ -106,6 +107,7 @@
   "response.PluginSettingsInvalid": "该技能需要完成配置后才能使用，请检查技能配置",
   "response.ProviderBizError": "模型服务商返回错误。请根据以下信息排查，或稍后重试",
   "response.QuotaLimitReached": "Token 用量或请求次数已达配额上限。请提升配额或稍后再试",
+  "response.QuotaLimitReachedCloud": "当前模型服务负载较高，请稍后重试。",
   "response.ServerAgentRuntimeError": "助理运行服务暂不可用。请稍后再试，或邮件联系我们",
   "response.StreamChunkError": "流式响应解析失败。请检查接口是否符合规范，或联系模型服务商",
   "response.SubscriptionKeyMismatch": "订阅状态同步异常。你可以点击下方按钮恢复订阅，或邮件联系我们",
diff --git a/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts b/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
index e676b6c7f4..c314c72aec 100644
--- a/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
+++ b/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
@@ -19,6 +19,8 @@ import { AgentRuntimeError } from '../../utils/createError';
 import { debugStream } from '../../utils/debugStream';
 import { desensitizeUrl } from '../../utils/desensitizeUrl';
 import { getModelPricing } from '../../utils/getModelPricing';
+import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError';
+import { isQuotaLimitError } from '../../utils/isQuotaLimitError';
 import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
 import { StreamingResponse } from '../../utils/response';
 import type { LobeRuntimeAI } from '../BaseAI';
@@ -283,6 +285,23 @@ export const handleDefaultAnthropicError = <T extends Record<string, any> = any>
 
   const { errorResult } = handleAnthropicError(error);
 
+  const errorMsg = errorResult.message || errorResult.error?.message;
+  if (isExceededContextWindowError(errorMsg)) {
+    return {
+      endpoint: desensitizedEndpoint,
+      error: errorResult,
+      errorType: AgentRuntimeErrorType.ExceededContextWindow,
+    };
+  }
+
+  if (isQuotaLimitError(errorMsg)) {
+    return {
+      endpoint: desensitizedEndpoint,
+      error: errorResult,
+      errorType: AgentRuntimeErrorType.QuotaLimitReached,
+    };
+  }
+
   return {
     endpoint: desensitizedEndpoint,
     error: errorResult,
@@ -660,6 +679,25 @@ export const createAnthropicCompatibleRuntime = <T extends Record<string, any> =
         return { headers: error?.headers, stack: error?.stack, status: error?.status };
       })();
 
+      const errorMsg = errorResult.message || errorResult.error?.message;
+      if (isExceededContextWindowError(errorMsg)) {
+        return AgentRuntimeError.chat({
+          endpoint: desensitizedEndpoint,
+          error: errorResult,
+          errorType: AgentRuntimeErrorType.ExceededContextWindow,
+          provider: this.id,
+        });
+      }
+
+      if (isQuotaLimitError(errorMsg)) {
+        return AgentRuntimeError.chat({
+          endpoint: desensitizedEndpoint,
+          error: errorResult,
+          errorType: AgentRuntimeErrorType.QuotaLimitReached,
+          provider: this.id,
+        });
+      }
+
       return AgentRuntimeError.chat({
         endpoint: desensitizedEndpoint,
         error: errorResult,
diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts
index 4dabd0ad48..e8c9125651 100644
--- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts
+++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts
@@ -351,7 +351,7 @@ describe('LobeOpenAICompatibleFactory', () => {
           'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10}\n\n',
           'id: output_speed\n',
           'event: speed\n',
-          expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time
+          expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time
           'id: a\n',
           'event: stop\n',
           'data: "stop"\n\n',
@@ -427,7 +427,7 @@ describe('LobeOpenAICompatibleFactory', () => {
           'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10,"cost":0.000005}\n\n',
           'id: output_speed\n',
           'event: speed\n',
-          expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time
+          expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time
           'id: a\n',
           'event: stop\n',
           'data: "stop"\n\n',
@@ -789,6 +789,80 @@ describe('LobeOpenAICompatibleFactory', () => {
         }
       });
 
+      it('should detect ExceededContextWindow from error message text', async () => {
+        const apiError = new OpenAI.APIError(
+          400,
+          {
+            error: {
+              message:
+                "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
+            },
+            status: 400,
+          },
+          'Error message',
+          {},
+        );
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              error: {
+                message:
+                  "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
+              },
+              status: 400,
+            },
+            errorType: AgentRuntimeErrorType.ExceededContextWindow,
+            provider,
+          });
+        }
+      });
+
+      it('should detect QuotaLimitReached from error message text', async () => {
+        const apiError = new OpenAI.APIError(
+          429,
+          {
+            error: {
+              message: 'Resource has been exhausted (e.g. check quota).',
+            },
+            status: 429,
+          },
+          'Error message',
+          {},
+        );
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              error: {
+                message: 'Resource has been exhausted (e.g. check quota).',
+              },
+              status: 429,
+            },
+            errorType: AgentRuntimeErrorType.QuotaLimitReached,
+            provider,
+          });
+        }
+      });
+
       it('should return AgentRuntimeError for non-OpenAI errors', async () => {
         // Arrange
         const genericError = new Error('Generic Error');
diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts
index 2834fad666..240e3eba61 100644
--- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts
+++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts
@@ -38,6 +38,8 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl';
 import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
 import { getModelPricing } from '../../utils/getModelPricing';
 import { handleOpenAIError } from '../../utils/handleOpenAIError';
+import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError';
+import { isQuotaLimitError } from '../../utils/isQuotaLimitError';
 import { postProcessModelList } from '../../utils/postProcessModelList';
 import { StreamingResponse } from '../../utils/response';
 import type { LobeRuntimeAI } from '../BaseAI';
@@ -900,6 +902,27 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
         }
       }
 
+      const errorMsg = errorResult.error?.message || errorResult.message;
+      if (isExceededContextWindowError(errorMsg)) {
+        log('context length exceeded detected from message');
+        return AgentRuntimeError.chat({
+          endpoint: desensitizedEndpoint,
+          error: errorResult,
+          errorType: AgentRuntimeErrorType.ExceededContextWindow,
+          provider: this.id,
+        });
+      }
+
+      if (isQuotaLimitError(errorMsg)) {
+        log('quota limit reached detected from message');
+        return AgentRuntimeError.chat({
+          endpoint: desensitizedEndpoint,
+          error: errorResult,
+          errorType: AgentRuntimeErrorType.QuotaLimitReached,
+          provider: this.id,
+        });
+      }
+
       log('returning generic error');
       return AgentRuntimeError.chat({
         endpoint: desensitizedEndpoint,
diff --git a/packages/model-runtime/src/utils/googleErrorParser.test.ts b/packages/model-runtime/src/utils/googleErrorParser.test.ts
index 14c0681d5d..3e7de10d7b 100644
--- a/packages/model-runtime/src/utils/googleErrorParser.test.ts
+++ b/packages/model-runtime/src/utils/googleErrorParser.test.ts
@@ -320,6 +320,31 @@ describe('googleErrorParser', () => {
       );
     });
 
+    it('should detect exceeded context window from message text', () => {
+      const input =
+        'The input token count exceeds the maximum number of tokens allowed for this model';
+      const result = parseGoogleErrorMessage(input);
+
+      expect(result.errorType).toBe(AgentRuntimeErrorType.ExceededContextWindow);
+      expect(result.error.message).toBe(input);
+    });
+
+    it('should detect quota limit from "resource has been exhausted" message', () => {
+      const input = 'Resource has been exhausted (e.g. check quota).';
+      const result = parseGoogleErrorMessage(input);
+
+      expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached);
+      expect(result.error.message).toBe(input);
+    });
+
+    it('should detect quota limit from "too many requests" message', () => {
+      const input = 'Too many requests, please try again later';
+      const result = parseGoogleErrorMessage(input);
+
+      expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached);
+      expect(result.error.message).toBe(input);
+    });
+
     it('should return default error for unparseable messages', () => {
       const input = 'Some random error message that cannot be parsed';
       const result = parseGoogleErrorMessage(input);
diff --git a/packages/model-runtime/src/utils/googleErrorParser.ts b/packages/model-runtime/src/utils/googleErrorParser.ts
index 531dc762a1..e552e894e5 100644
--- a/packages/model-runtime/src/utils/googleErrorParser.ts
+++ b/packages/model-runtime/src/utils/googleErrorParser.ts
@@ -1,5 +1,7 @@
 import type { ILobeAgentRuntimeErrorType } from '../types/error';
 import { AgentRuntimeErrorType } from '../types/error';
+import { isExceededContextWindowError } from './isExceededContextWindowError';
+import { isQuotaLimitError } from './isQuotaLimitError';
 
 export interface ParsedError {
   error: any;
@@ -110,6 +112,14 @@ export function parseGoogleErrorMessage(message: string): ParsedError {
     return { error: { message }, errorType: AgentRuntimeErrorType.ProviderNoImageGenerated };
   }
 
+  if (isExceededContextWindowError(message)) {
+    return { error: { message }, errorType: AgentRuntimeErrorType.ExceededContextWindow };
+  }
+
+  if (isQuotaLimitError(message)) {
+    return { error: { message }, errorType: AgentRuntimeErrorType.QuotaLimitReached };
+  }
+
   // Unified error type determination function
   const getErrorType = (code: number | null, message: string): ILobeAgentRuntimeErrorType => {
     if (code === 400 && message.includes('API key not valid')) {
diff --git a/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts b/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts
new file mode 100644
index 0000000000..57673d92f4
--- /dev/null
+++ b/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts
@@ -0,0 +1,81 @@
+import { describe, expect, it } from 'vitest';
+
+import { isExceededContextWindowError } from './isExceededContextWindowError';
+
+describe('isExceededContextWindowError', () => {
+  it('should return false for undefined/empty input', () => {
+    expect(isExceededContextWindowError(undefined)).toBe(false);
+    expect(isExceededContextWindowError('')).toBe(false);
+  });
+
+  it('should detect OpenAI/DeepSeek "maximum context length" errors', () => {
+    expect(
+      isExceededContextWindowError(
+        "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
+      ),
+    ).toBe(true);
+  });
+
+  it('should detect OpenAI "context length exceeded" errors', () => {
+    expect(isExceededContextWindowError('context length exceeded')).toBe(true);
+  });
+
+  it('should detect OpenAI "context_length_exceeded" code in message', () => {
+    expect(isExceededContextWindowError('Error code: context_length_exceeded')).toBe(true);
+  });
+
+  it('should detect MiniMax "context window exceeds" errors', () => {
+    expect(
+      isExceededContextWindowError('invalid params, context window exceeds limit (2013)'),
+    ).toBe(true);
+  });
+
+  it('should detect Aihubmix "exceeds the context window" errors', () => {
+    expect(
+      isExceededContextWindowError('This request exceeds the context window of this model'),
+    ).toBe(true);
+  });
+
+  it('should detect Anthropic "prompt is too long" errors', () => {
+    expect(isExceededContextWindowError('prompt is too long: 231426 tokens > 200000 maximum')).toBe(
+      true,
+    );
+  });
+
+  it('should detect Anthropic "input is too long" errors', () => {
+    expect(isExceededContextWindowError('input is too long for this model')).toBe(true);
+  });
+
+  it('should detect Bedrock "too many input tokens" errors', () => {
+    expect(isExceededContextWindowError('too many input tokens')).toBe(true);
+  });
+
+  it('should detect Google "exceeds the maximum number of tokens" errors', () => {
+    expect(
+      isExceededContextWindowError(
+        'The input token count exceeds the maximum number of tokens allowed',
+      ),
+    ).toBe(true);
+  });
+
+  it('should detect "maximum allowed number of input tokens" errors', () => {
+    expect(isExceededContextWindowError('maximum allowed number of input tokens is 128000')).toBe(
+      true,
+    );
+  });
+
+  it('should detect "request too large for model" errors', () => {
+    expect(isExceededContextWindowError('request too large for model')).toBe(true);
+  });
+
+  it('should be case-insensitive', () => {
+    expect(isExceededContextWindowError('MAXIMUM CONTEXT LENGTH exceeded')).toBe(true);
+    expect(isExceededContextWindowError('Prompt Is Too Long')).toBe(true);
+  });
+
+  it('should return false for unrelated error messages', () => {
+    expect(isExceededContextWindowError('Invalid API key')).toBe(false);
+    expect(isExceededContextWindowError('Rate limit exceeded')).toBe(false);
+    expect(isExceededContextWindowError('Internal server error')).toBe(false);
+  });
+});
diff --git a/packages/model-runtime/src/utils/isExceededContextWindowError.ts b/packages/model-runtime/src/utils/isExceededContextWindowError.ts
new file mode 100644
index 0000000000..72045914cb
--- /dev/null
+++ b/packages/model-runtime/src/utils/isExceededContextWindowError.ts
@@ -0,0 +1,19 @@
+const CONTEXT_WINDOW_PATTERNS = [
+  'maximum context length', // OpenAI/DeepSeek
+  'context length exceeded', // OpenAI
+  'context_length_exceeded', // OpenAI (code in message)
+  'context window exceeds', // MiniMax non-streaming
+  'exceeds the context window', // Aihubmix / generic
+  'prompt is too long', // Anthropic
+  'input is too long', // Anthropic
+  'too many input tokens', // Bedrock
+  'exceeds the maximum number of tokens', // Google
+  'maximum allowed number of input tokens',
+  'request too large for model',
+];
+
+export const isExceededContextWindowError = (message?: string): boolean => {
+  if (!message) return false;
+  const lower = message.toLowerCase();
+  return CONTEXT_WINDOW_PATTERNS.some((p) => lower.includes(p));
+};
diff --git a/packages/model-runtime/src/utils/isQuotaLimitError.test.ts b/packages/model-runtime/src/utils/isQuotaLimitError.test.ts
new file mode 100644
index 0000000000..d8342b0e15
--- /dev/null
+++ b/packages/model-runtime/src/utils/isQuotaLimitError.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from 'vitest';
+
+import { isQuotaLimitError } from './isQuotaLimitError';
+
+describe('isQuotaLimitError', () => {
+  it('should return false for undefined/empty input', () => {
+    expect(isQuotaLimitError(undefined)).toBe(false);
+    expect(isQuotaLimitError('')).toBe(false);
+  });
+
+  it('should detect Google "resource exhausted" errors', () => {
+    expect(isQuotaLimitError('Resource exhausted')).toBe(true);
+  });
+
+  it('should detect Google "resource has been exhausted" errors', () => {
+    expect(isQuotaLimitError('Resource has been exhausted (e.g. check quota).')).toBe(true);
+  });
+
+  it('should detect OpenAI "rate limit reached" errors', () => {
+    expect(isQuotaLimitError('Rate limit reached for model gpt-4 in organization')).toBe(true);
+  });
+
+  it('should detect OpenAI "rate_limit_exceeded" code in message', () => {
+    expect(isQuotaLimitError('Error code: rate_limit_exceeded')).toBe(true);
+  });
+
+  it('should detect "quota exceeded" errors', () => {
+    expect(isQuotaLimitError('Quota exceeded for this API key')).toBe(true);
+  });
+
+  it('should detect "too many requests" errors', () => {
+    expect(isQuotaLimitError('Too many requests, please slow down')).toBe(true);
+  });
+
+  it('should be case-insensitive', () => {
+    expect(isQuotaLimitError('RESOURCE EXHAUSTED')).toBe(true);
+    expect(isQuotaLimitError('Rate Limit Reached')).toBe(true);
+    expect(isQuotaLimitError('TOO MANY REQUESTS')).toBe(true);
+  });
+
+  it('should return false for unrelated error messages', () => {
+    expect(isQuotaLimitError('Invalid API key')).toBe(false);
+    expect(isQuotaLimitError('Context length exceeded')).toBe(false);
+    expect(isQuotaLimitError('Internal server error')).toBe(false);
+  });
+});
diff --git a/packages/model-runtime/src/utils/isQuotaLimitError.ts b/packages/model-runtime/src/utils/isQuotaLimitError.ts
new file mode 100644
index 0000000000..d883b27fb9
--- /dev/null
+++ b/packages/model-runtime/src/utils/isQuotaLimitError.ts
@@ -0,0 +1,14 @@
+const QUOTA_LIMIT_PATTERNS = [
+  'resource exhausted', // Google / VertexAI
+  'resource has been exhausted', // Google
+  'rate limit reached', // OpenAI
+  'rate_limit_exceeded', // OpenAI (code in message)
+  'quota exceeded', // generic
+  'too many requests', // generic
+];
+
+export const isQuotaLimitError = (message?: string): boolean => {
+  if (!message) return false;
+  const lower = message.toLowerCase();
+  return QUOTA_LIMIT_PATTERNS.some((p) => lower.includes(p));
+};
diff --git a/src/features/Conversation/Error/index.tsx b/src/features/Conversation/Error/index.tsx
index 2d503c003e..62c3c73180 100644
--- a/src/features/Conversation/Error/index.tsx
+++ b/src/features/Conversation/Error/index.tsx
@@ -55,10 +55,6 @@ const getErrorAlertConfig = (
       type: 'secondary',
     };
 
-  /* ↓ cloud slot ↓ */
-
-  /* ↑ cloud slot ↑ */
-
   switch (errorType) {
     case ChatErrorType.SystemTimeNotMatchError:
     case AgentRuntimeErrorType.PermissionDenied:
diff --git a/src/locales/default/error.ts b/src/locales/default/error.ts
index fc51cfef6f..93b45ffa19 100644
--- a/src/locales/default/error.ts
+++ b/src/locales/default/error.ts
@@ -106,6 +106,10 @@ export default {
     'Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.',
   'response.ExceededContextWindow':
     'The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.',
+  'response.ExceededContextWindowCloud':
+    'The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.',
+  'response.QuotaLimitReachedCloud':
+    'The model service is currently under heavy load. Please try again later.',
   'response.FreePlanLimit':
     'You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.',
   'response.GoogleAIBlockReason.BLOCKLIST':