From b91fa68b3130271d55363de4e0cdfbe3363af724 Mon Sep 17 00:00:00 2001 From: YuTengjing Date: Sat, 7 Mar 2026 23:26:57 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20detect=20exceeded=20conte?= =?UTF-8?q?xt=20window=20errors=20from=20message=20text=20(#12788)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- locales/en-US/error.json | 2 + locales/zh-CN/error.json | 2 + .../core/anthropicCompatibleFactory/index.ts | 38 +++++++++ .../openaiCompatibleFactory/index.test.ts | 78 +++++++++++++++++- .../src/core/openaiCompatibleFactory/index.ts | 23 ++++++ .../src/utils/googleErrorParser.test.ts | 25 ++++++ .../src/utils/googleErrorParser.ts | 10 +++ .../isExceededContextWindowError.test.ts | 81 +++++++++++++++++++ .../src/utils/isExceededContextWindowError.ts | 19 +++++ .../src/utils/isQuotaLimitError.test.ts | 46 +++++++++++ .../src/utils/isQuotaLimitError.ts | 14 ++++ src/features/Conversation/Error/index.tsx | 4 - src/locales/default/error.ts | 4 + 13 files changed, 340 insertions(+), 6 deletions(-) create mode 100644 packages/model-runtime/src/utils/isExceededContextWindowError.test.ts create mode 100644 packages/model-runtime/src/utils/isExceededContextWindowError.ts create mode 100644 packages/model-runtime/src/utils/isQuotaLimitError.test.ts create mode 100644 packages/model-runtime/src/utils/isQuotaLimitError.ts diff --git a/locales/en-US/error.json b/locales/en-US/error.json index b6807839c5..1758eb2488 100644 --- a/locales/en-US/error.json +++ b/locales/en-US/error.json @@ -67,6 +67,7 @@ "response.ConnectionCheckFailed": "The request returned empty. Please check if the API proxy address does not end with `/v1`.", "response.CreateMessageError": "Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.", "response.ExceededContextWindow": "The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.", + "response.ExceededContextWindowCloud": "The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.", "response.FreePlanLimit": "You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.", "response.GoogleAIBlockReason.BLOCKLIST": "Your content contains prohibited terms. Please review and modify your input, then try again.", "response.GoogleAIBlockReason.IMAGE_SAFETY": "The generated image was blocked for safety reasons. Please try modifying your image request.", @@ -106,6 +107,7 @@ "response.PluginSettingsInvalid": "This skill needs to be correctly configured before it can be used. Please check if your configuration is correct", "response.ProviderBizError": "Error requesting {{provider}} service, please troubleshoot or retry based on the following information", "response.QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.", + "response.QuotaLimitReachedCloud": "The model service is currently under heavy load. Please try again later.", "response.ServerAgentRuntimeError": "Sorry, the Agent service is currently unavailable. Please try again later or contact us via email for support.", "response.StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.", "response.SubscriptionKeyMismatch": "We apologize for the inconvenience. Due to a temporary system malfunction, your current subscription usage is inactive. Please click the button below to restore your subscription, or contact us via email for support.", diff --git a/locales/zh-CN/error.json b/locales/zh-CN/error.json index d470cb3c23..6f5c46656d 100644 --- a/locales/zh-CN/error.json +++ b/locales/zh-CN/error.json @@ -67,6 +67,7 @@ "response.ConnectionCheckFailed": "测试返回为空。请确认 API 代理地址末尾未包含 `/v1`", "response.CreateMessageError": "消息未能发送。建议先复制内容再重试;刷新页面后该消息不会保留", "response.ExceededContextWindow": "上下文长度超出模型限制。请减少内容量后重试", + "response.ExceededContextWindowCloud": "当前对话内容过长,无法继续处理。请编辑最后一条消息减少输入内容或者删除一些消息重试。", "response.FreePlanLimit": "当前计划不支持该功能。请升级到付费计划后继续", "response.GoogleAIBlockReason.BLOCKLIST": "内容包含被禁止的词汇。请修改后重试", "response.GoogleAIBlockReason.IMAGE_SAFETY": "图像生成请求因安全策略被阻止。请调整描述后重试", @@ -106,6 +107,7 @@ "response.PluginSettingsInvalid": "该技能需要完成配置后才能使用,请检查技能配置", "response.ProviderBizError": "模型服务商返回错误。请根据以下信息排查,或稍后重试", "response.QuotaLimitReached": "Token 用量或请求次数已达配额上限。请提升配额或稍后再试", + "response.QuotaLimitReachedCloud": "当前模型服务负载较高,请稍后重试。", "response.ServerAgentRuntimeError": "助理运行服务暂不可用。请稍后再试,或邮件联系我们", "response.StreamChunkError": "流式响应解析失败。请检查接口是否符合规范,或联系模型服务商", "response.SubscriptionKeyMismatch": "订阅状态同步异常。你可以点击下方按钮恢复订阅,或邮件联系我们", diff --git a/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts b/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts index e676b6c7f4..c314c72aec 100644 --- a/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts +++ b/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts @@ -19,6 +19,8 @@ import { AgentRuntimeError } from '../../utils/createError'; import { debugStream } from '../../utils/debugStream'; import { desensitizeUrl } from '../../utils/desensitizeUrl'; import { getModelPricing } from '../../utils/getModelPricing'; +import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError'; +import { isQuotaLimitError } from '../../utils/isQuotaLimitError'; import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse'; import { StreamingResponse } from '../../utils/response'; import type { LobeRuntimeAI } from '../BaseAI'; @@ -283,6 +285,23 @@ export const handleDefaultAnthropicError = = any> const { errorResult } = handleAnthropicError(error); + const errorMsg = errorResult.message || errorResult.error?.message; + if (isExceededContextWindowError(errorMsg)) { + return { + endpoint: desensitizedEndpoint, + error: errorResult, + errorType: AgentRuntimeErrorType.ExceededContextWindow, + }; + } + + if (isQuotaLimitError(errorMsg)) { + return { + endpoint: desensitizedEndpoint, + error: errorResult, + errorType: AgentRuntimeErrorType.QuotaLimitReached, + }; + } + return { endpoint: desensitizedEndpoint, error: errorResult, @@ -660,6 +679,25 @@ export const createAnthropicCompatibleRuntime = = return { headers: error?.headers, stack: error?.stack, status: error?.status }; })(); + const errorMsg = errorResult.message || errorResult.error?.message; + if (isExceededContextWindowError(errorMsg)) { + return AgentRuntimeError.chat({ + endpoint: desensitizedEndpoint, + error: errorResult, + errorType: AgentRuntimeErrorType.ExceededContextWindow, + provider: this.id, + }); + } + + if (isQuotaLimitError(errorMsg)) { + return AgentRuntimeError.chat({ + endpoint: desensitizedEndpoint, + error: errorResult, + errorType: AgentRuntimeErrorType.QuotaLimitReached, + provider: this.id, + }); + } + return AgentRuntimeError.chat({ endpoint: desensitizedEndpoint, error: errorResult, diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts index 4dabd0ad48..e8c9125651 100644 --- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts +++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts @@ -351,7 +351,7 @@ describe('LobeOpenAICompatibleFactory', () => { 'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10}\n\n', 'id: output_speed\n', 'event: speed\n', - expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time + expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time 'id: a\n', 'event: stop\n', 'data: "stop"\n\n', @@ -427,7 +427,7 @@ describe('LobeOpenAICompatibleFactory', () => { 'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10,"cost":0.000005}\n\n', 'id: output_speed\n', 'event: speed\n', - expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time + expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time 'id: a\n', 'event: stop\n', 'data: "stop"\n\n', @@ -789,6 +789,80 @@ describe('LobeOpenAICompatibleFactory', () => { } }); + it('should detect ExceededContextWindow from error message text', async () => { + const apiError = new OpenAI.APIError( + 400, + { + error: { + message: + "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.", + }, + status: 400, + }, + 'Error message', + {}, + ); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError); + + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + endpoint: defaultBaseURL, + error: { + error: { + message: + "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.", + }, + status: 400, + }, + errorType: AgentRuntimeErrorType.ExceededContextWindow, + provider, + }); + } + }); + + it('should detect QuotaLimitReached from error message text', async () => { + const apiError = new OpenAI.APIError( + 429, + { + error: { + message: 'Resource has been exhausted (e.g. check quota).', + }, + status: 429, + }, + 'Error message', + {}, + ); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError); + + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + endpoint: defaultBaseURL, + error: { + error: { + message: 'Resource has been exhausted (e.g. check quota).', + }, + status: 429, + }, + errorType: AgentRuntimeErrorType.QuotaLimitReached, + provider, + }); + } + }); + it('should return AgentRuntimeError for non-OpenAI errors', async () => { // Arrange const genericError = new Error('Generic Error'); diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts index 2834fad666..240e3eba61 100644 --- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts +++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts @@ -38,6 +38,8 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl'; import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty'; import { getModelPricing } from '../../utils/getModelPricing'; import { handleOpenAIError } from '../../utils/handleOpenAIError'; +import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError'; +import { isQuotaLimitError } from '../../utils/isQuotaLimitError'; import { postProcessModelList } from '../../utils/postProcessModelList'; import { StreamingResponse } from '../../utils/response'; import type { LobeRuntimeAI } from '../BaseAI'; @@ -900,6 +902,27 @@ export const createOpenAICompatibleRuntime = = an } } + const errorMsg = errorResult.error?.message || errorResult.message; + if (isExceededContextWindowError(errorMsg)) { + log('context length exceeded detected from message'); + return AgentRuntimeError.chat({ + endpoint: desensitizedEndpoint, + error: errorResult, + errorType: AgentRuntimeErrorType.ExceededContextWindow, + provider: this.id, + }); + } + + if (isQuotaLimitError(errorMsg)) { + log('quota limit reached detected from message'); + return AgentRuntimeError.chat({ + endpoint: desensitizedEndpoint, + error: errorResult, + errorType: AgentRuntimeErrorType.QuotaLimitReached, + provider: this.id, + }); + } + log('returning generic error'); return AgentRuntimeError.chat({ endpoint: desensitizedEndpoint, diff --git a/packages/model-runtime/src/utils/googleErrorParser.test.ts b/packages/model-runtime/src/utils/googleErrorParser.test.ts index 14c0681d5d..3e7de10d7b 100644 --- a/packages/model-runtime/src/utils/googleErrorParser.test.ts +++ b/packages/model-runtime/src/utils/googleErrorParser.test.ts @@ -320,6 +320,31 @@ describe('googleErrorParser', () => { ); }); + it('should detect exceeded context window from message text', () => { + const input = + 'The input token count exceeds the maximum number of tokens allowed for this model'; + const result = parseGoogleErrorMessage(input); + + expect(result.errorType).toBe(AgentRuntimeErrorType.ExceededContextWindow); + expect(result.error.message).toBe(input); + }); + + it('should detect quota limit from "resource has been exhausted" message', () => { + const input = 'Resource has been exhausted (e.g. check quota).'; + const result = parseGoogleErrorMessage(input); + + expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached); + expect(result.error.message).toBe(input); + }); + + it('should detect quota limit from "too many requests" message', () => { + const input = 'Too many requests, please try again later'; + const result = parseGoogleErrorMessage(input); + + expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached); + expect(result.error.message).toBe(input); + }); + it('should return default error for unparseable messages', () => { const input = 'Some random error message that cannot be parsed'; const result = parseGoogleErrorMessage(input); diff --git a/packages/model-runtime/src/utils/googleErrorParser.ts b/packages/model-runtime/src/utils/googleErrorParser.ts index 531dc762a1..e552e894e5 100644 --- a/packages/model-runtime/src/utils/googleErrorParser.ts +++ b/packages/model-runtime/src/utils/googleErrorParser.ts @@ -1,5 +1,7 @@ import type { ILobeAgentRuntimeErrorType } from '../types/error'; import { AgentRuntimeErrorType } from '../types/error'; +import { isExceededContextWindowError } from './isExceededContextWindowError'; +import { isQuotaLimitError } from './isQuotaLimitError'; export interface ParsedError { error: any; @@ -110,6 +112,14 @@ export function parseGoogleErrorMessage(message: string): ParsedError { return { error: { message }, errorType: AgentRuntimeErrorType.ProviderNoImageGenerated }; } + if (isExceededContextWindowError(message)) { + return { error: { message }, errorType: AgentRuntimeErrorType.ExceededContextWindow }; + } + + if (isQuotaLimitError(message)) { + return { error: { message }, errorType: AgentRuntimeErrorType.QuotaLimitReached }; + } + // Unified error type determination function const getErrorType = (code: number | null, message: string): ILobeAgentRuntimeErrorType => { if (code === 400 && message.includes('API key not valid')) { diff --git a/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts b/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts new file mode 100644 index 0000000000..57673d92f4 --- /dev/null +++ b/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, it } from 'vitest'; + +import { isExceededContextWindowError } from './isExceededContextWindowError'; + +describe('isExceededContextWindowError', () => { + it('should return false for undefined/empty input', () => { + expect(isExceededContextWindowError(undefined)).toBe(false); + expect(isExceededContextWindowError('')).toBe(false); + }); + + it('should detect OpenAI/DeepSeek "maximum context length" errors', () => { + expect( + isExceededContextWindowError( + "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.", + ), + ).toBe(true); + }); + + it('should detect OpenAI "context length exceeded" errors', () => { + expect(isExceededContextWindowError('context length exceeded')).toBe(true); + }); + + it('should detect OpenAI "context_length_exceeded" code in message', () => { + expect(isExceededContextWindowError('Error code: context_length_exceeded')).toBe(true); + }); + + it('should detect MiniMax "context window exceeds" errors', () => { + expect( + isExceededContextWindowError('invalid params, context window exceeds limit (2013)'), + ).toBe(true); + }); + + it('should detect Aihubmix "exceeds the context window" errors', () => { + expect( + isExceededContextWindowError('This request exceeds the context window of this model'), + ).toBe(true); + }); + + it('should detect Anthropic "prompt is too long" errors', () => { + expect(isExceededContextWindowError('prompt is too long: 231426 tokens > 200000 maximum')).toBe( + true, + ); + }); + + it('should detect Anthropic "input is too long" errors', () => { + expect(isExceededContextWindowError('input is too long for this model')).toBe(true); + }); + + it('should detect Bedrock "too many input tokens" errors', () => { + expect(isExceededContextWindowError('too many input tokens')).toBe(true); + }); + + it('should detect Google "exceeds the maximum number of tokens" errors', () => { + expect( + isExceededContextWindowError( + 'The input token count exceeds the maximum number of tokens allowed', + ), + ).toBe(true); + }); + + it('should detect "maximum allowed number of input tokens" errors', () => { + expect(isExceededContextWindowError('maximum allowed number of input tokens is 128000')).toBe( + true, + ); + }); + + it('should detect "request too large for model" errors', () => { + expect(isExceededContextWindowError('request too large for model')).toBe(true); + }); + + it('should be case-insensitive', () => { + expect(isExceededContextWindowError('MAXIMUM CONTEXT LENGTH exceeded')).toBe(true); + expect(isExceededContextWindowError('Prompt Is Too Long')).toBe(true); + }); + + it('should return false for unrelated error messages', () => { + expect(isExceededContextWindowError('Invalid API key')).toBe(false); + expect(isExceededContextWindowError('Rate limit exceeded')).toBe(false); + expect(isExceededContextWindowError('Internal server error')).toBe(false); + }); +}); diff --git a/packages/model-runtime/src/utils/isExceededContextWindowError.ts b/packages/model-runtime/src/utils/isExceededContextWindowError.ts new file mode 100644 index 0000000000..72045914cb --- /dev/null +++ b/packages/model-runtime/src/utils/isExceededContextWindowError.ts @@ -0,0 +1,19 @@ +const CONTEXT_WINDOW_PATTERNS = [ + 'maximum context length', // OpenAI/DeepSeek + 'context length exceeded', // OpenAI + 'context_length_exceeded', // OpenAI (code in message) + 'context window exceeds', // MiniMax non-streaming + 'exceeds the context window', // Aihubmix / generic + 'prompt is too long', // Anthropic + 'input is too long', // Anthropic + 'too many input tokens', // Bedrock + 'exceeds the maximum number of tokens', // Google + 'maximum allowed number of input tokens', + 'request too large for model', +]; + +export const isExceededContextWindowError = (message?: string): boolean => { + if (!message) return false; + const lower = message.toLowerCase(); + return CONTEXT_WINDOW_PATTERNS.some((p) => lower.includes(p)); +}; diff --git a/packages/model-runtime/src/utils/isQuotaLimitError.test.ts b/packages/model-runtime/src/utils/isQuotaLimitError.test.ts new file mode 100644 index 0000000000..d8342b0e15 --- /dev/null +++ b/packages/model-runtime/src/utils/isQuotaLimitError.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from 'vitest'; + +import { isQuotaLimitError } from './isQuotaLimitError'; + +describe('isQuotaLimitError', () => { + it('should return false for undefined/empty input', () => { + expect(isQuotaLimitError(undefined)).toBe(false); + expect(isQuotaLimitError('')).toBe(false); + }); + + it('should detect Google "resource exhausted" errors', () => { + expect(isQuotaLimitError('Resource exhausted')).toBe(true); + }); + + it('should detect Google "resource has been exhausted" errors', () => { + expect(isQuotaLimitError('Resource has been exhausted (e.g. check quota).')).toBe(true); + }); + + it('should detect OpenAI "rate limit reached" errors', () => { + expect(isQuotaLimitError('Rate limit reached for model gpt-4 in organization')).toBe(true); + }); + + it('should detect OpenAI "rate_limit_exceeded" code in message', () => { + expect(isQuotaLimitError('Error code: rate_limit_exceeded')).toBe(true); + }); + + it('should detect "quota exceeded" errors', () => { + expect(isQuotaLimitError('Quota exceeded for this API key')).toBe(true); + }); + + it('should detect "too many requests" errors', () => { + expect(isQuotaLimitError('Too many requests, please slow down')).toBe(true); + }); + + it('should be case-insensitive', () => { + expect(isQuotaLimitError('RESOURCE EXHAUSTED')).toBe(true); + expect(isQuotaLimitError('Rate Limit Reached')).toBe(true); + expect(isQuotaLimitError('TOO MANY REQUESTS')).toBe(true); + }); + + it('should return false for unrelated error messages', () => { + expect(isQuotaLimitError('Invalid API key')).toBe(false); + expect(isQuotaLimitError('Context length exceeded')).toBe(false); + expect(isQuotaLimitError('Internal server error')).toBe(false); + }); +}); diff --git a/packages/model-runtime/src/utils/isQuotaLimitError.ts b/packages/model-runtime/src/utils/isQuotaLimitError.ts new file mode 100644 index 0000000000..d883b27fb9 --- /dev/null +++ b/packages/model-runtime/src/utils/isQuotaLimitError.ts @@ -0,0 +1,14 @@ +const QUOTA_LIMIT_PATTERNS = [ + 'resource exhausted', // Google / VertexAI + 'resource has been exhausted', // Google + 'rate limit reached', // OpenAI + 'rate_limit_exceeded', // OpenAI (code in message) + 'quota exceeded', // generic + 'too many requests', // generic +]; + +export const isQuotaLimitError = (message?: string): boolean => { + if (!message) return false; + const lower = message.toLowerCase(); + return QUOTA_LIMIT_PATTERNS.some((p) => lower.includes(p)); +}; diff --git a/src/features/Conversation/Error/index.tsx b/src/features/Conversation/Error/index.tsx index 2d503c003e..62c3c73180 100644 --- a/src/features/Conversation/Error/index.tsx +++ b/src/features/Conversation/Error/index.tsx @@ -55,10 +55,6 @@ const getErrorAlertConfig = ( type: 'secondary', }; - /* ↓ cloud slot ↓ */ - - /* ↑ cloud slot ↑ */ - switch (errorType) { case ChatErrorType.SystemTimeNotMatchError: case AgentRuntimeErrorType.PermissionDenied: diff --git a/src/locales/default/error.ts b/src/locales/default/error.ts index fc51cfef6f..93b45ffa19 100644 --- a/src/locales/default/error.ts +++ b/src/locales/default/error.ts @@ -106,6 +106,10 @@ export default { 'Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.', 'response.ExceededContextWindow': 'The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.', + 'response.ExceededContextWindowCloud': + 'The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.', + 'response.QuotaLimitReachedCloud': + 'The model service is currently under heavy load. Please try again later.', 'response.FreePlanLimit': 'You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.', 'response.GoogleAIBlockReason.BLOCKLIST':