mirror of
https://github.com/lobehub/lobehub.git
synced 2026-03-27 13:29:15 +07:00
🐛 fix: detect exceeded context window errors from message text (#12788)
This commit is contained in:
@@ -67,6 +67,7 @@
|
|||||||
"response.ConnectionCheckFailed": "The request returned empty. Please check if the API proxy address does not end with `/v1`.",
|
"response.ConnectionCheckFailed": "The request returned empty. Please check if the API proxy address does not end with `/v1`.",
|
||||||
"response.CreateMessageError": "Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.",
|
"response.CreateMessageError": "Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.",
|
||||||
"response.ExceededContextWindow": "The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.",
|
"response.ExceededContextWindow": "The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.",
|
||||||
|
"response.ExceededContextWindowCloud": "The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.",
|
||||||
"response.FreePlanLimit": "You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.",
|
"response.FreePlanLimit": "You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.",
|
||||||
"response.GoogleAIBlockReason.BLOCKLIST": "Your content contains prohibited terms. Please review and modify your input, then try again.",
|
"response.GoogleAIBlockReason.BLOCKLIST": "Your content contains prohibited terms. Please review and modify your input, then try again.",
|
||||||
"response.GoogleAIBlockReason.IMAGE_SAFETY": "The generated image was blocked for safety reasons. Please try modifying your image request.",
|
"response.GoogleAIBlockReason.IMAGE_SAFETY": "The generated image was blocked for safety reasons. Please try modifying your image request.",
|
||||||
@@ -106,6 +107,7 @@
|
|||||||
"response.PluginSettingsInvalid": "This skill needs to be correctly configured before it can be used. Please check if your configuration is correct",
|
"response.PluginSettingsInvalid": "This skill needs to be correctly configured before it can be used. Please check if your configuration is correct",
|
||||||
"response.ProviderBizError": "Error requesting {{provider}} service, please troubleshoot or retry based on the following information",
|
"response.ProviderBizError": "Error requesting {{provider}} service, please troubleshoot or retry based on the following information",
|
||||||
"response.QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
|
"response.QuotaLimitReached": "Sorry, the token usage or request count has reached the quota limit for this key. Please increase the key's quota or try again later.",
|
||||||
|
"response.QuotaLimitReachedCloud": "The model service is currently under heavy load. Please try again later.",
|
||||||
"response.ServerAgentRuntimeError": "Sorry, the Agent service is currently unavailable. Please try again later or contact us via email for support.",
|
"response.ServerAgentRuntimeError": "Sorry, the Agent service is currently unavailable. Please try again later or contact us via email for support.",
|
||||||
"response.StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
|
"response.StreamChunkError": "Error parsing the message chunk of the streaming request. Please check if the current API interface complies with the standard specifications, or contact your API provider for assistance.",
|
||||||
"response.SubscriptionKeyMismatch": "We apologize for the inconvenience. Due to a temporary system malfunction, your current subscription usage is inactive. Please click the button below to restore your subscription, or contact us via email for support.",
|
"response.SubscriptionKeyMismatch": "We apologize for the inconvenience. Due to a temporary system malfunction, your current subscription usage is inactive. Please click the button below to restore your subscription, or contact us via email for support.",
|
||||||
|
|||||||
@@ -67,6 +67,7 @@
|
|||||||
"response.ConnectionCheckFailed": "测试返回为空。请确认 API 代理地址末尾未包含 `/v1`",
|
"response.ConnectionCheckFailed": "测试返回为空。请确认 API 代理地址末尾未包含 `/v1`",
|
||||||
"response.CreateMessageError": "消息未能发送。建议先复制内容再重试;刷新页面后该消息不会保留",
|
"response.CreateMessageError": "消息未能发送。建议先复制内容再重试;刷新页面后该消息不会保留",
|
||||||
"response.ExceededContextWindow": "上下文长度超出模型限制。请减少内容量后重试",
|
"response.ExceededContextWindow": "上下文长度超出模型限制。请减少内容量后重试",
|
||||||
|
"response.ExceededContextWindowCloud": "当前对话内容过长,无法继续处理。请编辑最后一条消息减少输入内容或者删除一些消息重试。",
|
||||||
"response.FreePlanLimit": "当前计划不支持该功能。请升级到付费计划后继续",
|
"response.FreePlanLimit": "当前计划不支持该功能。请升级到付费计划后继续",
|
||||||
"response.GoogleAIBlockReason.BLOCKLIST": "内容包含被禁止的词汇。请修改后重试",
|
"response.GoogleAIBlockReason.BLOCKLIST": "内容包含被禁止的词汇。请修改后重试",
|
||||||
"response.GoogleAIBlockReason.IMAGE_SAFETY": "图像生成请求因安全策略被阻止。请调整描述后重试",
|
"response.GoogleAIBlockReason.IMAGE_SAFETY": "图像生成请求因安全策略被阻止。请调整描述后重试",
|
||||||
@@ -106,6 +107,7 @@
|
|||||||
"response.PluginSettingsInvalid": "该技能需要完成配置后才能使用,请检查技能配置",
|
"response.PluginSettingsInvalid": "该技能需要完成配置后才能使用,请检查技能配置",
|
||||||
"response.ProviderBizError": "模型服务商返回错误。请根据以下信息排查,或稍后重试",
|
"response.ProviderBizError": "模型服务商返回错误。请根据以下信息排查,或稍后重试",
|
||||||
"response.QuotaLimitReached": "Token 用量或请求次数已达配额上限。请提升配额或稍后再试",
|
"response.QuotaLimitReached": "Token 用量或请求次数已达配额上限。请提升配额或稍后再试",
|
||||||
|
"response.QuotaLimitReachedCloud": "当前模型服务负载较高,请稍后重试。",
|
||||||
"response.ServerAgentRuntimeError": "助理运行服务暂不可用。请稍后再试,或邮件联系我们",
|
"response.ServerAgentRuntimeError": "助理运行服务暂不可用。请稍后再试,或邮件联系我们",
|
||||||
"response.StreamChunkError": "流式响应解析失败。请检查接口是否符合规范,或联系模型服务商",
|
"response.StreamChunkError": "流式响应解析失败。请检查接口是否符合规范,或联系模型服务商",
|
||||||
"response.SubscriptionKeyMismatch": "订阅状态同步异常。你可以点击下方按钮恢复订阅,或邮件联系我们",
|
"response.SubscriptionKeyMismatch": "订阅状态同步异常。你可以点击下方按钮恢复订阅,或邮件联系我们",
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ import { AgentRuntimeError } from '../../utils/createError';
|
|||||||
import { debugStream } from '../../utils/debugStream';
|
import { debugStream } from '../../utils/debugStream';
|
||||||
import { desensitizeUrl } from '../../utils/desensitizeUrl';
|
import { desensitizeUrl } from '../../utils/desensitizeUrl';
|
||||||
import { getModelPricing } from '../../utils/getModelPricing';
|
import { getModelPricing } from '../../utils/getModelPricing';
|
||||||
|
import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError';
|
||||||
|
import { isQuotaLimitError } from '../../utils/isQuotaLimitError';
|
||||||
import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
|
import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
|
||||||
import { StreamingResponse } from '../../utils/response';
|
import { StreamingResponse } from '../../utils/response';
|
||||||
import type { LobeRuntimeAI } from '../BaseAI';
|
import type { LobeRuntimeAI } from '../BaseAI';
|
||||||
@@ -283,6 +285,23 @@ export const handleDefaultAnthropicError = <T extends Record<string, any> = any>
|
|||||||
|
|
||||||
const { errorResult } = handleAnthropicError(error);
|
const { errorResult } = handleAnthropicError(error);
|
||||||
|
|
||||||
|
const errorMsg = errorResult.message || errorResult.error?.message;
|
||||||
|
if (isExceededContextWindowError(errorMsg)) {
|
||||||
|
return {
|
||||||
|
endpoint: desensitizedEndpoint,
|
||||||
|
error: errorResult,
|
||||||
|
errorType: AgentRuntimeErrorType.ExceededContextWindow,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isQuotaLimitError(errorMsg)) {
|
||||||
|
return {
|
||||||
|
endpoint: desensitizedEndpoint,
|
||||||
|
error: errorResult,
|
||||||
|
errorType: AgentRuntimeErrorType.QuotaLimitReached,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
endpoint: desensitizedEndpoint,
|
endpoint: desensitizedEndpoint,
|
||||||
error: errorResult,
|
error: errorResult,
|
||||||
@@ -660,6 +679,25 @@ export const createAnthropicCompatibleRuntime = <T extends Record<string, any> =
|
|||||||
return { headers: error?.headers, stack: error?.stack, status: error?.status };
|
return { headers: error?.headers, stack: error?.stack, status: error?.status };
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
const errorMsg = errorResult.message || errorResult.error?.message;
|
||||||
|
if (isExceededContextWindowError(errorMsg)) {
|
||||||
|
return AgentRuntimeError.chat({
|
||||||
|
endpoint: desensitizedEndpoint,
|
||||||
|
error: errorResult,
|
||||||
|
errorType: AgentRuntimeErrorType.ExceededContextWindow,
|
||||||
|
provider: this.id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isQuotaLimitError(errorMsg)) {
|
||||||
|
return AgentRuntimeError.chat({
|
||||||
|
endpoint: desensitizedEndpoint,
|
||||||
|
error: errorResult,
|
||||||
|
errorType: AgentRuntimeErrorType.QuotaLimitReached,
|
||||||
|
provider: this.id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return AgentRuntimeError.chat({
|
return AgentRuntimeError.chat({
|
||||||
endpoint: desensitizedEndpoint,
|
endpoint: desensitizedEndpoint,
|
||||||
error: errorResult,
|
error: errorResult,
|
||||||
|
|||||||
@@ -351,7 +351,7 @@ describe('LobeOpenAICompatibleFactory', () => {
|
|||||||
'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10}\n\n',
|
'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10}\n\n',
|
||||||
'id: output_speed\n',
|
'id: output_speed\n',
|
||||||
'event: speed\n',
|
'event: speed\n',
|
||||||
expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time
|
expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time
|
||||||
'id: a\n',
|
'id: a\n',
|
||||||
'event: stop\n',
|
'event: stop\n',
|
||||||
'data: "stop"\n\n',
|
'data: "stop"\n\n',
|
||||||
@@ -427,7 +427,7 @@ describe('LobeOpenAICompatibleFactory', () => {
|
|||||||
'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10,"cost":0.000005}\n\n',
|
'data: {"inputTextTokens":5,"outputTextTokens":5,"totalInputTokens":5,"totalOutputTokens":5,"totalTokens":10,"cost":0.000005}\n\n',
|
||||||
'id: output_speed\n',
|
'id: output_speed\n',
|
||||||
'event: speed\n',
|
'event: speed\n',
|
||||||
expect.stringMatching(/^data: {.*"tps":.*,"ttft":.*}\n\n$/), // tps ttft should be calculated with elapsed time
|
expect.stringMatching(/^data: \{.*"tps":.*,"ttft":.*\}\n\n$/), // tps ttft should be calculated with elapsed time
|
||||||
'id: a\n',
|
'id: a\n',
|
||||||
'event: stop\n',
|
'event: stop\n',
|
||||||
'data: "stop"\n\n',
|
'data: "stop"\n\n',
|
||||||
@@ -789,6 +789,80 @@ describe('LobeOpenAICompatibleFactory', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should detect ExceededContextWindow from error message text', async () => {
|
||||||
|
const apiError = new OpenAI.APIError(
|
||||||
|
400,
|
||||||
|
{
|
||||||
|
error: {
|
||||||
|
message:
|
||||||
|
"This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
|
||||||
|
},
|
||||||
|
status: 400,
|
||||||
|
},
|
||||||
|
'Error message',
|
||||||
|
{},
|
||||||
|
);
|
||||||
|
|
||||||
|
vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await instance.chat({
|
||||||
|
messages: [{ content: 'Hello', role: 'user' }],
|
||||||
|
model: 'mistralai/mistral-7b-instruct:free',
|
||||||
|
temperature: 0,
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
expect(e).toEqual({
|
||||||
|
endpoint: defaultBaseURL,
|
||||||
|
error: {
|
||||||
|
error: {
|
||||||
|
message:
|
||||||
|
"This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
|
||||||
|
},
|
||||||
|
status: 400,
|
||||||
|
},
|
||||||
|
errorType: AgentRuntimeErrorType.ExceededContextWindow,
|
||||||
|
provider,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect QuotaLimitReached from error message text', async () => {
|
||||||
|
const apiError = new OpenAI.APIError(
|
||||||
|
429,
|
||||||
|
{
|
||||||
|
error: {
|
||||||
|
message: 'Resource has been exhausted (e.g. check quota).',
|
||||||
|
},
|
||||||
|
status: 429,
|
||||||
|
},
|
||||||
|
'Error message',
|
||||||
|
{},
|
||||||
|
);
|
||||||
|
|
||||||
|
vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await instance.chat({
|
||||||
|
messages: [{ content: 'Hello', role: 'user' }],
|
||||||
|
model: 'mistralai/mistral-7b-instruct:free',
|
||||||
|
temperature: 0,
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
expect(e).toEqual({
|
||||||
|
endpoint: defaultBaseURL,
|
||||||
|
error: {
|
||||||
|
error: {
|
||||||
|
message: 'Resource has been exhausted (e.g. check quota).',
|
||||||
|
},
|
||||||
|
status: 429,
|
||||||
|
},
|
||||||
|
errorType: AgentRuntimeErrorType.QuotaLimitReached,
|
||||||
|
provider,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it('should return AgentRuntimeError for non-OpenAI errors', async () => {
|
it('should return AgentRuntimeError for non-OpenAI errors', async () => {
|
||||||
// Arrange
|
// Arrange
|
||||||
const genericError = new Error('Generic Error');
|
const genericError = new Error('Generic Error');
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ import { desensitizeUrl } from '../../utils/desensitizeUrl';
|
|||||||
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
|
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
|
||||||
import { getModelPricing } from '../../utils/getModelPricing';
|
import { getModelPricing } from '../../utils/getModelPricing';
|
||||||
import { handleOpenAIError } from '../../utils/handleOpenAIError';
|
import { handleOpenAIError } from '../../utils/handleOpenAIError';
|
||||||
|
import { isExceededContextWindowError } from '../../utils/isExceededContextWindowError';
|
||||||
|
import { isQuotaLimitError } from '../../utils/isQuotaLimitError';
|
||||||
import { postProcessModelList } from '../../utils/postProcessModelList';
|
import { postProcessModelList } from '../../utils/postProcessModelList';
|
||||||
import { StreamingResponse } from '../../utils/response';
|
import { StreamingResponse } from '../../utils/response';
|
||||||
import type { LobeRuntimeAI } from '../BaseAI';
|
import type { LobeRuntimeAI } from '../BaseAI';
|
||||||
@@ -900,6 +902,27 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const errorMsg = errorResult.error?.message || errorResult.message;
|
||||||
|
if (isExceededContextWindowError(errorMsg)) {
|
||||||
|
log('context length exceeded detected from message');
|
||||||
|
return AgentRuntimeError.chat({
|
||||||
|
endpoint: desensitizedEndpoint,
|
||||||
|
error: errorResult,
|
||||||
|
errorType: AgentRuntimeErrorType.ExceededContextWindow,
|
||||||
|
provider: this.id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isQuotaLimitError(errorMsg)) {
|
||||||
|
log('quota limit reached detected from message');
|
||||||
|
return AgentRuntimeError.chat({
|
||||||
|
endpoint: desensitizedEndpoint,
|
||||||
|
error: errorResult,
|
||||||
|
errorType: AgentRuntimeErrorType.QuotaLimitReached,
|
||||||
|
provider: this.id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
log('returning generic error');
|
log('returning generic error');
|
||||||
return AgentRuntimeError.chat({
|
return AgentRuntimeError.chat({
|
||||||
endpoint: desensitizedEndpoint,
|
endpoint: desensitizedEndpoint,
|
||||||
|
|||||||
@@ -320,6 +320,31 @@ describe('googleErrorParser', () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should detect exceeded context window from message text', () => {
|
||||||
|
const input =
|
||||||
|
'The input token count exceeds the maximum number of tokens allowed for this model';
|
||||||
|
const result = parseGoogleErrorMessage(input);
|
||||||
|
|
||||||
|
expect(result.errorType).toBe(AgentRuntimeErrorType.ExceededContextWindow);
|
||||||
|
expect(result.error.message).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect quota limit from "resource has been exhausted" message', () => {
|
||||||
|
const input = 'Resource has been exhausted (e.g. check quota).';
|
||||||
|
const result = parseGoogleErrorMessage(input);
|
||||||
|
|
||||||
|
expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached);
|
||||||
|
expect(result.error.message).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect quota limit from "too many requests" message', () => {
|
||||||
|
const input = 'Too many requests, please try again later';
|
||||||
|
const result = parseGoogleErrorMessage(input);
|
||||||
|
|
||||||
|
expect(result.errorType).toBe(AgentRuntimeErrorType.QuotaLimitReached);
|
||||||
|
expect(result.error.message).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
it('should return default error for unparseable messages', () => {
|
it('should return default error for unparseable messages', () => {
|
||||||
const input = 'Some random error message that cannot be parsed';
|
const input = 'Some random error message that cannot be parsed';
|
||||||
const result = parseGoogleErrorMessage(input);
|
const result = parseGoogleErrorMessage(input);
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import type { ILobeAgentRuntimeErrorType } from '../types/error';
|
import type { ILobeAgentRuntimeErrorType } from '../types/error';
|
||||||
import { AgentRuntimeErrorType } from '../types/error';
|
import { AgentRuntimeErrorType } from '../types/error';
|
||||||
|
import { isExceededContextWindowError } from './isExceededContextWindowError';
|
||||||
|
import { isQuotaLimitError } from './isQuotaLimitError';
|
||||||
|
|
||||||
export interface ParsedError {
|
export interface ParsedError {
|
||||||
error: any;
|
error: any;
|
||||||
@@ -110,6 +112,14 @@ export function parseGoogleErrorMessage(message: string): ParsedError {
|
|||||||
return { error: { message }, errorType: AgentRuntimeErrorType.ProviderNoImageGenerated };
|
return { error: { message }, errorType: AgentRuntimeErrorType.ProviderNoImageGenerated };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isExceededContextWindowError(message)) {
|
||||||
|
return { error: { message }, errorType: AgentRuntimeErrorType.ExceededContextWindow };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isQuotaLimitError(message)) {
|
||||||
|
return { error: { message }, errorType: AgentRuntimeErrorType.QuotaLimitReached };
|
||||||
|
}
|
||||||
|
|
||||||
// Unified error type determination function
|
// Unified error type determination function
|
||||||
const getErrorType = (code: number | null, message: string): ILobeAgentRuntimeErrorType => {
|
const getErrorType = (code: number | null, message: string): ILobeAgentRuntimeErrorType => {
|
||||||
if (code === 400 && message.includes('API key not valid')) {
|
if (code === 400 && message.includes('API key not valid')) {
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
|
import { isExceededContextWindowError } from './isExceededContextWindowError';
|
||||||
|
|
||||||
|
describe('isExceededContextWindowError', () => {
|
||||||
|
it('should return false for undefined/empty input', () => {
|
||||||
|
expect(isExceededContextWindowError(undefined)).toBe(false);
|
||||||
|
expect(isExceededContextWindowError('')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect OpenAI/DeepSeek "maximum context length" errors', () => {
|
||||||
|
expect(
|
||||||
|
isExceededContextWindowError(
|
||||||
|
"This model's maximum context length is 131072 tokens. However, your messages resulted in 140000 tokens.",
|
||||||
|
),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect OpenAI "context length exceeded" errors', () => {
|
||||||
|
expect(isExceededContextWindowError('context length exceeded')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect OpenAI "context_length_exceeded" code in message', () => {
|
||||||
|
expect(isExceededContextWindowError('Error code: context_length_exceeded')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect MiniMax "context window exceeds" errors', () => {
|
||||||
|
expect(
|
||||||
|
isExceededContextWindowError('invalid params, context window exceeds limit (2013)'),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Aihubmix "exceeds the context window" errors', () => {
|
||||||
|
expect(
|
||||||
|
isExceededContextWindowError('This request exceeds the context window of this model'),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Anthropic "prompt is too long" errors', () => {
|
||||||
|
expect(isExceededContextWindowError('prompt is too long: 231426 tokens > 200000 maximum')).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Anthropic "input is too long" errors', () => {
|
||||||
|
expect(isExceededContextWindowError('input is too long for this model')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Bedrock "too many input tokens" errors', () => {
|
||||||
|
expect(isExceededContextWindowError('too many input tokens')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Google "exceeds the maximum number of tokens" errors', () => {
|
||||||
|
expect(
|
||||||
|
isExceededContextWindowError(
|
||||||
|
'The input token count exceeds the maximum number of tokens allowed',
|
||||||
|
),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect "maximum allowed number of input tokens" errors', () => {
|
||||||
|
expect(isExceededContextWindowError('maximum allowed number of input tokens is 128000')).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect "request too large for model" errors', () => {
|
||||||
|
expect(isExceededContextWindowError('request too large for model')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should be case-insensitive', () => {
|
||||||
|
expect(isExceededContextWindowError('MAXIMUM CONTEXT LENGTH exceeded')).toBe(true);
|
||||||
|
expect(isExceededContextWindowError('Prompt Is Too Long')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return false for unrelated error messages', () => {
|
||||||
|
expect(isExceededContextWindowError('Invalid API key')).toBe(false);
|
||||||
|
expect(isExceededContextWindowError('Rate limit exceeded')).toBe(false);
|
||||||
|
expect(isExceededContextWindowError('Internal server error')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
const CONTEXT_WINDOW_PATTERNS = [
|
||||||
|
'maximum context length', // OpenAI/DeepSeek
|
||||||
|
'context length exceeded', // OpenAI
|
||||||
|
'context_length_exceeded', // OpenAI (code in message)
|
||||||
|
'context window exceeds', // MiniMax non-streaming
|
||||||
|
'exceeds the context window', // Aihubmix / generic
|
||||||
|
'prompt is too long', // Anthropic
|
||||||
|
'input is too long', // Anthropic
|
||||||
|
'too many input tokens', // Bedrock
|
||||||
|
'exceeds the maximum number of tokens', // Google
|
||||||
|
'maximum allowed number of input tokens',
|
||||||
|
'request too large for model',
|
||||||
|
];
|
||||||
|
|
||||||
|
export const isExceededContextWindowError = (message?: string): boolean => {
|
||||||
|
if (!message) return false;
|
||||||
|
const lower = message.toLowerCase();
|
||||||
|
return CONTEXT_WINDOW_PATTERNS.some((p) => lower.includes(p));
|
||||||
|
};
|
||||||
46
packages/model-runtime/src/utils/isQuotaLimitError.test.ts
Normal file
46
packages/model-runtime/src/utils/isQuotaLimitError.test.ts
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
|
import { isQuotaLimitError } from './isQuotaLimitError';
|
||||||
|
|
||||||
|
describe('isQuotaLimitError', () => {
|
||||||
|
it('should return false for undefined/empty input', () => {
|
||||||
|
expect(isQuotaLimitError(undefined)).toBe(false);
|
||||||
|
expect(isQuotaLimitError('')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Google "resource exhausted" errors', () => {
|
||||||
|
expect(isQuotaLimitError('Resource exhausted')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect Google "resource has been exhausted" errors', () => {
|
||||||
|
expect(isQuotaLimitError('Resource has been exhausted (e.g. check quota).')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect OpenAI "rate limit reached" errors', () => {
|
||||||
|
expect(isQuotaLimitError('Rate limit reached for model gpt-4 in organization')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect OpenAI "rate_limit_exceeded" code in message', () => {
|
||||||
|
expect(isQuotaLimitError('Error code: rate_limit_exceeded')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect "quota exceeded" errors', () => {
|
||||||
|
expect(isQuotaLimitError('Quota exceeded for this API key')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect "too many requests" errors', () => {
|
||||||
|
expect(isQuotaLimitError('Too many requests, please slow down')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should be case-insensitive', () => {
|
||||||
|
expect(isQuotaLimitError('RESOURCE EXHAUSTED')).toBe(true);
|
||||||
|
expect(isQuotaLimitError('Rate Limit Reached')).toBe(true);
|
||||||
|
expect(isQuotaLimitError('TOO MANY REQUESTS')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return false for unrelated error messages', () => {
|
||||||
|
expect(isQuotaLimitError('Invalid API key')).toBe(false);
|
||||||
|
expect(isQuotaLimitError('Context length exceeded')).toBe(false);
|
||||||
|
expect(isQuotaLimitError('Internal server error')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
14
packages/model-runtime/src/utils/isQuotaLimitError.ts
Normal file
14
packages/model-runtime/src/utils/isQuotaLimitError.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
const QUOTA_LIMIT_PATTERNS = [
|
||||||
|
'resource exhausted', // Google / VertexAI
|
||||||
|
'resource has been exhausted', // Google
|
||||||
|
'rate limit reached', // OpenAI
|
||||||
|
'rate_limit_exceeded', // OpenAI (code in message)
|
||||||
|
'quota exceeded', // generic
|
||||||
|
'too many requests', // generic
|
||||||
|
];
|
||||||
|
|
||||||
|
export const isQuotaLimitError = (message?: string): boolean => {
|
||||||
|
if (!message) return false;
|
||||||
|
const lower = message.toLowerCase();
|
||||||
|
return QUOTA_LIMIT_PATTERNS.some((p) => lower.includes(p));
|
||||||
|
};
|
||||||
@@ -55,10 +55,6 @@ const getErrorAlertConfig = (
|
|||||||
type: 'secondary',
|
type: 'secondary',
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ↓ cloud slot ↓ */
|
|
||||||
|
|
||||||
/* ↑ cloud slot ↑ */
|
|
||||||
|
|
||||||
switch (errorType) {
|
switch (errorType) {
|
||||||
case ChatErrorType.SystemTimeNotMatchError:
|
case ChatErrorType.SystemTimeNotMatchError:
|
||||||
case AgentRuntimeErrorType.PermissionDenied:
|
case AgentRuntimeErrorType.PermissionDenied:
|
||||||
|
|||||||
@@ -106,6 +106,10 @@ export default {
|
|||||||
'Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.',
|
'Sorry, the message could not be sent successfully. Please copy the content and try sending it again. This message will not be retained after refreshing the page.',
|
||||||
'response.ExceededContextWindow':
|
'response.ExceededContextWindow':
|
||||||
'The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.',
|
'The current request content exceeds the length that the model can handle. Please reduce the amount of content and try again.',
|
||||||
|
'response.ExceededContextWindowCloud':
|
||||||
|
'The conversation is too long to process. Please edit your last message to reduce input or delete some messages and try again.',
|
||||||
|
'response.QuotaLimitReachedCloud':
|
||||||
|
'The model service is currently under heavy load. Please try again later.',
|
||||||
'response.FreePlanLimit':
|
'response.FreePlanLimit':
|
||||||
'You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.',
|
'You are currently a free user and cannot use this feature. Please upgrade to a paid plan to continue using it.',
|
||||||
'response.GoogleAIBlockReason.BLOCKLIST':
|
'response.GoogleAIBlockReason.BLOCKLIST':
|
||||||
|
|||||||
Reference in New Issue
Block a user