🐛 fix: detect exceeded context window errors from message text (#12788)

2026-03-27 13:29:15 +07:00 · 2026-03-07 23:26:57 +08:00
parent ac1376ede5
commit b91fa68b31
13 changed files with 340 additions and 6 deletions
--- a/locales/en-US/error.json
+++ b/locales/en-US/error.json
@@ -67,6 +67,7 @@
  "response.ConnectionCheckFailed": "The request returned empty. Please check if the API proxy address does not end with `/v1`.",
  "response.CreateMessageError": "Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.",
  "response.ExceededContextWindow": "The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.",
  "response.ExceededContextWindowCloud": "The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.",
  "response.FreePlanLimit": "You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.",
  "response.GoogleAIBlockReason.BLOCKLIST": "Your content contains prohibited terms. Please review and modify your input, then try again.",
  "response.GoogleAIBlockReason.IMAGE_SAFETY": "The generated image was blocked for safety reasons. Please try modifying your image request.",
@@ -106,6 +107,7 @@
  "response.PluginSettingsInvalid": "This skill needs to be correctly configured before it can be used. Please check if your configuration is correct",
  "response.ProviderBizError": "Error requesting {{provider}} service, please troubleshoot or retry based on the following information",
  "response.QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
  "response.QuotaLimitReachedCloud": "The model service is currently under heavy load. Please try again later.",
  "response.ServerAgentRuntimeError": "Sorry, the Agent service is currently unavailable. Please try again later or contact us via email for support.",
  "response.StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
  "response.SubscriptionKeyMismatch": "We apologize for the inconvenience. Due to a temporary system malfunction, your current subscription usage is inactive. Please click the button below to restore your subscription, or contact us via email for support.",
--- a/locales/zh-CN/error.json
+++ b/locales/zh-CN/error.json
@@ -67,6 +67,7 @@
  "response.ConnectionCheckFailed": "测试返回为空。请确认 API 代理地址末尾未包含 `/v1`",
  "response.CreateMessageError": "消息未能发送。建议先复制内容再重试；刷新页面后该消息不会保留",
  "response.ExceededContextWindow": "上下文长度超出模型限制。请减少内容量后重试",
  "response.ExceededContextWindowCloud": "当前对话内容过长，无法继续处理。请编辑最后一条消息减少输入内容或者删除一些消息重试。",
  "response.FreePlanLimit": "当前计划不支持该功能。请升级到付费计划后继续",
  "response.GoogleAIBlockReason.BLOCKLIST": "内容包含被禁止的词汇。请修改后重试",
  "response.GoogleAIBlockReason.IMAGE_SAFETY": "图像生成请求因安全策略被阻止。请调整描述后重试",
@@ -106,6 +107,7 @@
  "response.PluginSettingsInvalid": "该技能需要完成配置后才能使用，请检查技能配置",
  "response.ProviderBizError": "模型服务商返回错误。请根据以下信息排查，或稍后重试",
  "response.QuotaLimitReached": "Token 用量或请求次数已达配额上限。请提升配额或稍后再试",
  "response.QuotaLimitReachedCloud": "当前模型服务负载较高，请稍后重试。",
  "response.ServerAgentRuntimeError": "助理运行服务暂不可用。请稍后再试，或邮件联系我们",
  "response.StreamChunkError": "流式响应解析失败。请检查接口是否符合规范，或联系模型服务商",
  "response.SubscriptionKeyMismatch": "订阅状态同步异常。你可以点击下方按钮恢复订阅，或邮件联系我们",
--- a/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
+++ b/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
@@ -19,6 +19,8 @@ import { AgentRuntimeError } from '../../utils/createError';
 import { debugStream } from '../../utils/debugStream';
 import { desensitizeUrl } from '../../utils/desensitizeUrl';
 import { getModelPricing } from '../../utils/getModelPricing';
 import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError';
 import { isQuotaLimitError } from '../../utils/isQuotaLimitError';
 import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
 import { StreamingResponse } from '../../utils/response';
 import type { LobeRuntimeAI } from '../BaseAI';
@@ -283,6 +285,23 @@ export const handleDefaultAnthropicError = <T extends Record<string, any> = any>
  const { errorResult } = handleAnthropicError(error);
  const errorMsg = errorResult.message || errorResult.error?.message;
  if (isExceededContextWindowError(errorMsg)) {
    return {
      endpoint: desensitizedEndpoint,
      error: errorResult,
      errorType: AgentRuntimeErrorType.ExceededContextWindow,
    };
  }
  if (isQuotaLimitError(errorMsg)) {
    return {
      endpoint: desensitizedEndpoint,
      error: errorResult,
      errorType: AgentRuntimeErrorType.QuotaLimitReached,
    };
  }
  return {
    endpoint: desensitizedEndpoint,
    error: errorResult,
@@ -660,6 +679,25 @@ export const createAnthropicCompatibleRuntime = <T extends Record<string, any> =
        return { headers: error?.headers, stack: error?.stack, status: error?.status };
      })();
      const errorMsg = errorResult.message || errorResult.error?.message;
      if (isExceededContextWindowError(errorMsg)) {
        return AgentRuntimeError.chat({
          endpoint: desensitizedEndpoint,
          error: errorResult,
          errorType: AgentRuntimeErrorType.ExceededContextWindow,
          provider: this.id,
        });
      }
      if (isQuotaLimitError(errorMsg)) {
        return AgentRuntimeError.chat({
          endpoint: desensitizedEndpoint,
          error: errorResult,
          errorType: AgentRuntimeErrorType.QuotaLimitReached,
          provider: this.id,
        });
      }
      return AgentRuntimeError.chat({
        endpoint: desensitizedEndpoint,
        error: errorResult,
--- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts
+++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.test.ts
@@ -351,7 +351,7 @@ describe('LobeOpenAICompatibleFactory', () => {
          'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10}\n\n',
          'id: output_speed\n',
          'event: speed\n',
-          expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time
+          expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time
          'id: a\n',
          'event: stop\n',
          'data: "stop"\n\n',
@@ -427,7 +427,7 @@ describe('LobeOpenAICompatibleFactory', () => {
          'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10,"cost":0.000005}\n\n',
          'id: output_speed\n',
          'event: speed\n',
-          expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time
+          expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time
          'id: a\n',
          'event: stop\n',
          'data: "stop"\n\n',
@@ -789,6 +789,80 @@ describe('LobeOpenAICompatibleFactory', () => {
        }
      });
      it('should detect ExceededContextWindow from error message text', async () => {
        const apiError = new OpenAI.APIError(
          400,
          {
            error: {
              message:
                "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
            },
            status: 400,
          },
          'Error message',
          {},
        );
        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
        try {
          await instance.chat({
            messages: [{ content: 'Hello', role: 'user' }],
            model: 'mistralai/mistral-7b-instruct:free',
            temperature: 0,
          });
        } catch (e) {
          expect(e).toEqual({
            endpoint: defaultBaseURL,
            error: {
              error: {
                message:
                  "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
              },
              status: 400,
            },
            errorType: AgentRuntimeErrorType.ExceededContextWindow,
            provider,
          });
        }
      });
      it('should detect QuotaLimitReached from error message text', async () => {
        const apiError = new OpenAI.APIError(
          429,
          {
            error: {
              message: 'Resource has been exhausted (e.g. check quota).',
            },
            status: 429,
          },
          'Error message',
          {},
        );
        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
        try {
          await instance.chat({
            messages: [{ content: 'Hello', role: 'user' }],
            model: 'mistralai/mistral-7b-instruct:free',
            temperature: 0,
          });
        } catch (e) {
          expect(e).toEqual({
            endpoint: defaultBaseURL,
            error: {
              error: {
                message: 'Resource has been exhausted (e.g. check quota).',
              },
              status: 429,
            },
            errorType: AgentRuntimeErrorType.QuotaLimitReached,
            provider,
          });
        }
      });
      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
        // Arrange
        const genericError = new Error('Generic Error');
--- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts
+++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts
@@ -38,6 +38,8 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl';
 import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
 import { getModelPricing } from '../../utils/getModelPricing';
 import { handleOpenAIError } from '../../utils/handleOpenAIError';
 import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError';
 import { isQuotaLimitError } from '../../utils/isQuotaLimitError';
 import { postProcessModelList } from '../../utils/postProcessModelList';
 import { StreamingResponse } from '../../utils/response';
 import type { LobeRuntimeAI } from '../BaseAI';
@@ -900,6 +902,27 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
        }
      }
      const errorMsg = errorResult.error?.message || errorResult.message;
      if (isExceededContextWindowError(errorMsg)) {
        log('context length exceeded detected from message');
        return AgentRuntimeError.chat({
          endpoint: desensitizedEndpoint,
          error: errorResult,
          errorType: AgentRuntimeErrorType.ExceededContextWindow,
          provider: this.id,
        });
      }
      if (isQuotaLimitError(errorMsg)) {
        log('quota limit reached detected from message');
        return AgentRuntimeError.chat({
          endpoint: desensitizedEndpoint,
          error: errorResult,
          errorType: AgentRuntimeErrorType.QuotaLimitReached,
          provider: this.id,
        });
      }
      log('returning generic error');
      return AgentRuntimeError.chat({
        endpoint: desensitizedEndpoint,
--- a/packages/model-runtime/src/utils/googleErrorParser.test.ts
+++ b/packages/model-runtime/src/utils/googleErrorParser.test.ts
@@ -320,6 +320,31 @@ describe('googleErrorParser', () => {
      );
    });
    it('should detect exceeded context window from message text', () => {
      const input =
        'The input token count exceeds the maximum number of tokens allowed for this model';
      const result = parseGoogleErrorMessage(input);
      expect(result.errorType).toBe(AgentRuntimeErrorType.ExceededContextWindow);
      expect(result.error.message).toBe(input);
    });
    it('should detect quota limit from "resource has been exhausted" message', () => {
      const input = 'Resource has been exhausted (e.g. check quota).';
      const result = parseGoogleErrorMessage(input);
      expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached);
      expect(result.error.message).toBe(input);
    });
    it('should detect quota limit from "too many requests" message', () => {
      const input = 'Too many requests, please try again later';
      const result = parseGoogleErrorMessage(input);
      expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached);
      expect(result.error.message).toBe(input);
    });
    it('should return default error for unparseable messages', () => {
      const input = 'Some random error message that cannot be parsed';
      const result = parseGoogleErrorMessage(input);
--- a/packages/model-runtime/src/utils/googleErrorParser.ts
+++ b/packages/model-runtime/src/utils/googleErrorParser.ts
@@ -1,5 +1,7 @@
 import type { ILobeAgentRuntimeErrorType } from '../types/error';
 import { AgentRuntimeErrorType } from '../types/error';
 import { isExceededContextWindowError } from './isExceededContextWindowError';
 import { isQuotaLimitError } from './isQuotaLimitError';
 export interface ParsedError {
  error: any;
@@ -110,6 +112,14 @@ export function parseGoogleErrorMessage(message: string): ParsedError {
    return { error: { message }, errorType: AgentRuntimeErrorType.ProviderNoImageGenerated };
  }
  if (isExceededContextWindowError(message)) {
    return { error: { message }, errorType: AgentRuntimeErrorType.ExceededContextWindow };
  }
  if (isQuotaLimitError(message)) {
    return { error: { message }, errorType: AgentRuntimeErrorType.QuotaLimitReached };
  }
  // Unified error type determination function
  const getErrorType = (code: number | null, message: string): ILobeAgentRuntimeErrorType => {
    if (code === 400 && message.includes('API key not valid')) {
--- a/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts
+++ b/packages/model-runtime/src/utils/isExceededContextWindowError.test.ts
@@ -0,0 +1,81 @@
 import { describe, expect, it } from 'vitest';
 import { isExceededContextWindowError } from './isExceededContextWindowError';
 describe('isExceededContextWindowError', () => {
  it('should return false for undefined/empty input', () => {
    expect(isExceededContextWindowError(undefined)).toBe(false);
    expect(isExceededContextWindowError('')).toBe(false);
  });
  it('should detect OpenAI/DeepSeek "maximum context length" errors', () => {
    expect(
      isExceededContextWindowError(
        "This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
      ),
    ).toBe(true);
  });
  it('should detect OpenAI "context length exceeded" errors', () => {
    expect(isExceededContextWindowError('context length exceeded')).toBe(true);
  });
  it('should detect OpenAI "context_length_exceeded" code in message', () => {
    expect(isExceededContextWindowError('Error code: context_length_exceeded')).toBe(true);
  });
  it('should detect MiniMax "context window exceeds" errors', () => {
    expect(
      isExceededContextWindowError('invalid params, context window exceeds limit (2013)'),
    ).toBe(true);
  });
  it('should detect Aihubmix "exceeds the context window" errors', () => {
    expect(
      isExceededContextWindowError('This request exceeds the context window of this model'),
    ).toBe(true);
  });
  it('should detect Anthropic "prompt is too long" errors', () => {
    expect(isExceededContextWindowError('prompt is too long: 231426 tokens > 200000 maximum')).toBe(
      true,
    );
  });
  it('should detect Anthropic "input is too long" errors', () => {
    expect(isExceededContextWindowError('input is too long for this model')).toBe(true);
  });
  it('should detect Bedrock "too many input tokens" errors', () => {
    expect(isExceededContextWindowError('too many input tokens')).toBe(true);
  });
  it('should detect Google "exceeds the maximum number of tokens" errors', () => {
    expect(
      isExceededContextWindowError(
        'The input token count exceeds the maximum number of tokens allowed',
      ),
    ).toBe(true);
  });
  it('should detect "maximum allowed number of input tokens" errors', () => {
    expect(isExceededContextWindowError('maximum allowed number of input tokens is 128000')).toBe(
      true,
    );
  });
  it('should detect "request too large for model" errors', () => {
    expect(isExceededContextWindowError('request too large for model')).toBe(true);
  });
  it('should be case-insensitive', () => {
    expect(isExceededContextWindowError('MAXIMUM CONTEXT LENGTH exceeded')).toBe(true);
    expect(isExceededContextWindowError('Prompt Is Too Long')).toBe(true);
  });
  it('should return false for unrelated error messages', () => {
    expect(isExceededContextWindowError('Invalid API key')).toBe(false);
    expect(isExceededContextWindowError('Rate limit exceeded')).toBe(false);
    expect(isExceededContextWindowError('Internal server error')).toBe(false);
  });
 });
--- a/packages/model-runtime/src/utils/isExceededContextWindowError.ts
+++ b/packages/model-runtime/src/utils/isExceededContextWindowError.ts
@@ -0,0 +1,19 @@
 const CONTEXT_WINDOW_PATTERNS = [
  'maximum context length', // OpenAI/DeepSeek
  'context length exceeded', // OpenAI
  'context_length_exceeded', // OpenAI (code in message)
  'context window exceeds', // MiniMax non-streaming
  'exceeds the context window', // Aihubmix / generic
  'prompt is too long', // Anthropic
  'input is too long', // Anthropic
  'too many input tokens', // Bedrock
  'exceeds the maximum number of tokens', // Google
  'maximum allowed number of input tokens',
  'request too large for model',
 ];
 export const isExceededContextWindowError = (message?: string): boolean => {
  if (!message) return false;
  const lower = message.toLowerCase();
  return CONTEXT_WINDOW_PATTERNS.some((p) => lower.includes(p));
 };
--- a/packages/model-runtime/src/utils/isQuotaLimitError.test.ts
+++ b/packages/model-runtime/src/utils/isQuotaLimitError.test.ts
@@ -0,0 +1,46 @@
 import { describe, expect, it } from 'vitest';
 import { isQuotaLimitError } from './isQuotaLimitError';
 describe('isQuotaLimitError', () => {
  it('should return false for undefined/empty input', () => {
    expect(isQuotaLimitError(undefined)).toBe(false);
    expect(isQuotaLimitError('')).toBe(false);
  });
  it('should detect Google "resource exhausted" errors', () => {
    expect(isQuotaLimitError('Resource exhausted')).toBe(true);
  });
  it('should detect Google "resource has been exhausted" errors', () => {
    expect(isQuotaLimitError('Resource has been exhausted (e.g. check quota).')).toBe(true);
  });
  it('should detect OpenAI "rate limit reached" errors', () => {
    expect(isQuotaLimitError('Rate limit reached for model gpt-4 in organization')).toBe(true);
  });
  it('should detect OpenAI "rate_limit_exceeded" code in message', () => {
    expect(isQuotaLimitError('Error code: rate_limit_exceeded')).toBe(true);
  });
  it('should detect "quota exceeded" errors', () => {
    expect(isQuotaLimitError('Quota exceeded for this API key')).toBe(true);
  });
  it('should detect "too many requests" errors', () => {
    expect(isQuotaLimitError('Too many requests, please slow down')).toBe(true);
  });
  it('should be case-insensitive', () => {
    expect(isQuotaLimitError('RESOURCE EXHAUSTED')).toBe(true);
    expect(isQuotaLimitError('Rate Limit Reached')).toBe(true);
    expect(isQuotaLimitError('TOO MANY REQUESTS')).toBe(true);
  });
  it('should return false for unrelated error messages', () => {
    expect(isQuotaLimitError('Invalid API key')).toBe(false);
    expect(isQuotaLimitError('Context length exceeded')).toBe(false);
    expect(isQuotaLimitError('Internal server error')).toBe(false);
  });
 });
--- a/packages/model-runtime/src/utils/isQuotaLimitError.ts
+++ b/packages/model-runtime/src/utils/isQuotaLimitError.ts
@@ -0,0 +1,14 @@
 const QUOTA_LIMIT_PATTERNS = [
  'resource exhausted', // Google / VertexAI
  'resource has been exhausted', // Google
  'rate limit reached', // OpenAI
  'rate_limit_exceeded', // OpenAI (code in message)
  'quota exceeded', // generic
  'too many requests', // generic
 ];
 export const isQuotaLimitError = (message?: string): boolean => {
  if (!message) return false;
  const lower = message.toLowerCase();
  return QUOTA_LIMIT_PATTERNS.some((p) => lower.includes(p));
 };
--- a/src/features/Conversation/Error/index.tsx
+++ b/src/features/Conversation/Error/index.tsx
@@ -55,10 +55,6 @@ const getErrorAlertConfig = (
      type: 'secondary',
    };
  /* ↓ cloud slot ↓ */
  /* ↑ cloud slot ↑ */
  switch (errorType) {
    case ChatErrorType.SystemTimeNotMatchError:
    case AgentRuntimeErrorType.PermissionDenied:
--- a/src/locales/default/error.ts
+++ b/src/locales/default/error.ts
@@ -106,6 +106,10 @@ export default {
    'Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.',
  'response.ExceededContextWindow':
    'The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.',
  'response.ExceededContextWindowCloud':
    'The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.',
  'response.QuotaLimitReachedCloud':
    'The model service is currently under heavy load. Please try again later.',
  'response.FreePlanLimit':
    'You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.',
  'response.GoogleAIBlockReason.BLOCKLIST':