feat: bedrock claude model thinking support (#10422)

This commit is contained in:
YuTengjing
2025-11-26 00:42:36 +08:00
committed by GitHub
parent 5bab1a4bcf
commit 8b41638755
13 changed files with 309 additions and 211 deletions

View File

@@ -6,6 +6,7 @@ export * from './discover';
export * from './layoutTokens';
export * from './message';
export * from './meta';
export * from './models';
export * from './plugin';
export * from './session';
export * from './settings';

View File

@@ -43,51 +43,63 @@ export const responsesAPIModels = new Set([
]);
/**
* models support context caching
* Regex patterns for models that support context caching (3.5+)
*/
export const contextCachingModels = new Set([
'claude-opus-4-5-20251101',
'claude-haiku-4-5-20251001',
'claude-sonnet-4-5-latest',
'claude-sonnet-4-5-20250929',
'anthropic/claude-sonnet-4.5',
'claude-opus-4-latest',
'claude-opus-4-20250514',
'claude-sonnet-4-latest',
'claude-sonnet-4-20250514',
'claude-3-7-sonnet-latest',
'claude-3-7-sonnet-20250219',
'claude-3-5-sonnet-latest',
'claude-3-5-sonnet-20241022',
'claude-3-5-sonnet-20240620',
'claude-3-5-haiku-latest',
'claude-3-5-haiku-20241022',
// Bedrock model IDs
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
'anthropic.claude-sonnet-4-5-20250929-v1:0',
'us.anthropic.claude-haiku-4-5-20251001-v1:0',
'anthropic.claude-haiku-4-5-20251001-v1:0',
'global.anthropic.claude-opus-4-5-20251101-v1:0',
'anthropic.claude-opus-4-5-20251101-v1:0',
]);
export const contextCachingModelPatterns: RegExp[] = [
// Claude 4.5 series - Anthropic API
/^claude-(opus|sonnet|haiku)-4-5-/,
// Claude 4 series - Anthropic API
/^claude-(opus|sonnet)-4-/,
// Claude 3.7 - Anthropic API
/^claude-3-7-sonnet-/,
// Claude 3.5 series - Anthropic API
/^claude-3-5-(sonnet|haiku)-/,
// OpenRouter format (3.5+)
/^anthropic\/claude-(opus|sonnet|haiku)-(4\.5|4|3\.7|3\.5)/,
/^anthropic\/claude-(4\.5|4|3\.7|3\.5)-(opus|sonnet|haiku)/,
// AWS Bedrock format: [region.]anthropic.claude-xxx
/anthropic\.claude-(opus|sonnet|haiku)-(4-5|4|3-7|3-5)-/,
];
export const thinkingWithToolClaudeModels = new Set([
'claude-opus-4-5-20251101',
'claude-opus-4-latest',
'claude-opus-4-20250514',
'claude-sonnet-4-latest',
'claude-sonnet-4-20250514',
'claude-sonnet-4-5-latest',
'claude-sonnet-4-5-20250929',
'claude-haiku-4-5-20251001',
'anthropic/claude-sonnet-4.5',
'claude-3-7-sonnet-latest',
'claude-3-7-sonnet-20250219',
// Bedrock model IDs
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
'anthropic.claude-sonnet-4-5-20250929-v1:0',
'us.anthropic.claude-haiku-4-5-20251001-v1:0',
'anthropic.claude-haiku-4-5-20251001-v1:0',
'global.anthropic.claude-opus-4-5-20251101-v1:0',
'anthropic.claude-opus-4-5-20251101-v1:0',
]);
export const isContextCachingModel = (model: string): boolean => {
return contextCachingModelPatterns.some((pattern) => pattern.test(model));
};
/**
* Regex patterns for Claude models that support thinking with tools (3.7+)
*/
export const thinkingWithToolClaudeModelPatterns: RegExp[] = [
// Claude 4.5 series - Anthropic API
/^claude-(opus|sonnet|haiku)-4-5-/,
// Claude 4 series - Anthropic API
/^claude-(opus|sonnet)-4-/,
// Claude 3.7 - Anthropic API
/^claude-3-7-sonnet-/,
// OpenRouter format (3.7+)
/^anthropic\/claude-(opus|sonnet|haiku)-(4\.5|4|3\.7)/,
/^anthropic\/claude-(4\.5|4|3\.7)-(opus|sonnet|haiku)/,
// AWS Bedrock format: [region.]anthropic.claude-xxx
/anthropic\.claude-(opus|sonnet|haiku)-(4-5|4|3-7)-/,
];
export const isThinkingWithToolClaudeModel = (model: string): boolean => {
return thinkingWithToolClaudeModelPatterns.some((pattern) => pattern.test(model));
};
/**
* Regex patterns for Claude 4+ models that have temperature/top_p parameter conflict
* (cannot set both temperature and top_p at the same time)
*/
export const temperatureTopPConflictModelPatterns: RegExp[] = [
// Claude 4+ series - Anthropic API (4, 4.1, 4.5)
/^claude-(opus|sonnet|haiku)-4/,
// OpenRouter format
/^anthropic\/claude-(opus|sonnet|haiku)-(4\.5|4\.1|4)/,
/^anthropic\/claude-(4\.5|4\.1|4)-(opus|sonnet|haiku)/,
// AWS Bedrock format: [region.]anthropic.claude-xxx
/anthropic\.claude-(opus|sonnet|haiku)-4/,
];
export const hasTemperatureTopPConflict = (model: string): boolean => {
return temperatureTopPConflictModelPatterns.some((pattern) => pattern.test(model));
};

View File

@@ -13,7 +13,7 @@ const bedrockChatModels: AIChatModelCard[] = [
'Claude Opus 4.5 是 Anthropic 的旗舰模型,结合了卓越的智能与可扩展性能,适合需要最高质量回应和推理能力的复杂任务。',
displayName: 'Claude Opus 4.5',
enabled: true,
id: 'us.anthropic.claude-opus-4-5-20251101-v1:0',
id: 'global.anthropic.claude-opus-4-5-20251101-v1:0',
maxOutput: 64_000,
pricing: {
units: [
@@ -23,6 +23,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2025-11-24',
settings: {
extendParams: ['disableContextCaching', 'enableReasoning', 'reasoningBudgetToken'],
},
type: 'chat',
},
{
@@ -45,6 +48,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2025-09-29',
settings: {
extendParams: ['disableContextCaching', 'enableReasoning', 'reasoningBudgetToken'],
},
type: 'chat',
},
{
@@ -68,6 +74,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2025-10-15',
settings: {
extendParams: ['disableContextCaching', 'enableReasoning', 'reasoningBudgetToken'],
},
type: 'chat',
},
/*
@@ -103,7 +112,7 @@ const bedrockChatModels: AIChatModelCard[] = [
'Claude 3.7 sonnet 是 Anthropic 最快的下一代模型。与 Claude 3 Haiku 相比Claude 3.7 Sonnet 在各项技能上都有所提升,并在许多智力基准测试中超越了上一代最大的模型 Claude 3 Opus。',
displayName: 'Claude 3.7 Sonnet',
id: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
maxOutput: 8192,
maxOutput: 64_000,
pricing: {
units: [
{ name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
@@ -111,6 +120,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2025-02-24',
settings: {
extendParams: ['disableContextCaching', 'enableReasoning', 'reasoningBudgetToken'],
},
type: 'chat',
},
{
@@ -131,6 +143,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2024-10-22',
settings: {
extendParams: ['disableContextCaching'],
},
type: 'chat',
},
{
@@ -151,6 +166,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2024-10-22',
settings: {
extendParams: ['disableContextCaching'],
},
type: 'chat',
},
{
@@ -171,6 +189,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2024-06-20',
settings: {
extendParams: ['disableContextCaching'],
},
type: 'chat',
},
{
@@ -191,6 +212,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2024-03-07',
settings: {
extendParams: ['disableContextCaching'],
},
type: 'chat',
},
{
@@ -228,6 +252,9 @@ const bedrockChatModels: AIChatModelCard[] = [
],
},
releasedAt: '2024-02-29',
settings: {
extendParams: ['disableContextCaching'],
},
type: 'chat',
},
{
@@ -264,6 +291,7 @@ const bedrockChatModels: AIChatModelCard[] = [
'一款快速、经济且仍然非常有能力的模型,可以处理包括日常对话、文本分析、总结和文档问答在内的一系列任务。',
displayName: 'Claude Instant',
id: 'anthropic.claude-instant-v1',
maxOutput: 4096,
pricing: {
units: [
{ name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' },

View File

@@ -5,14 +5,14 @@ export const systemToUserModels = new Set([
'o1-mini-2024-09-12',
]);
// TODO: 临时写法,后续要重构成 model card 展示配置
// TODO: temporary implementation, needs to be refactored into model card display configuration
export const disableStreamModels = new Set([
'o1',
'o1-2024-12-17',
'o1-pro',
'o1-pro-2025-03-19',
/*
官网显示不支持,但是实际试下来支持 Streaming暂时注释掉
Official documentation shows no support, but actual testing shows Streaming is supported, temporarily commented out
'o3-pro',
'o3-pro-2025-06-10',
*/
@@ -38,30 +38,68 @@ export const responsesAPIModels = new Set([
'gpt-5-codex',
'gpt-5-pro',
'gpt-5-pro-2025-10-06',
'gpt-5.1-codex',
'gpt-5.1-codex-mini',
]);
/**
* models support context caching
* Regex patterns for models that support context caching (3.5+)
*/
export const contextCachingModels = new Set([
'claude-opus-4-latest',
'claude-opus-4-20250514',
'claude-sonnet-4-latest',
'claude-sonnet-4-20250514',
'claude-3-7-sonnet-latest',
'claude-3-7-sonnet-20250219',
'claude-3-5-sonnet-latest',
'claude-3-5-sonnet-20241022',
'claude-3-5-sonnet-20240620',
'claude-3-5-haiku-latest',
'claude-3-5-haiku-20241022',
]);
export const contextCachingModelPatterns: RegExp[] = [
// Claude 4.5 series - Anthropic API
/^claude-(opus|sonnet|haiku)-4-5-/,
// Claude 4 series - Anthropic API
/^claude-(opus|sonnet)-4-/,
// Claude 3.7 - Anthropic API
/^claude-3-7-sonnet-/,
// Claude 3.5 series - Anthropic API
/^claude-3-5-(sonnet|haiku)-/,
// OpenRouter format (3.5+)
/^anthropic\/claude-(opus|sonnet|haiku)-(4\.5|4|3\.7|3\.5)/,
/^anthropic\/claude-(4\.5|4|3\.7|3\.5)-(opus|sonnet|haiku)/,
// AWS Bedrock format: [region.]anthropic.claude-xxx
/anthropic\.claude-(opus|sonnet|haiku)-(4-5|4|3-7|3-5)-/,
];
export const thinkingWithToolClaudeModels = new Set([
'claude-opus-4-latest',
'claude-opus-4-20250514',
'claude-sonnet-4-latest',
'claude-sonnet-4-20250514',
'claude-3-7-sonnet-latest',
'claude-3-7-sonnet-20250219',
]);
export const isContextCachingModel = (model: string): boolean => {
return contextCachingModelPatterns.some((pattern) => pattern.test(model));
};
/**
* Regex patterns for Claude models that support thinking with tools (3.7+)
*/
export const thinkingWithToolClaudeModelPatterns: RegExp[] = [
// Claude 4.5 series - Anthropic API
/^claude-(opus|sonnet|haiku)-4-5-/,
// Claude 4 series - Anthropic API
/^claude-(opus|sonnet)-4-/,
// Claude 3.7 - Anthropic API
/^claude-3-7-sonnet-/,
// OpenRouter format (3.7+)
/^anthropic\/claude-(opus|sonnet|haiku)-(4\.5|4|3\.7)/,
/^anthropic\/claude-(4\.5|4|3\.7)-(opus|sonnet|haiku)/,
// AWS Bedrock format: [region.]anthropic.claude-xxx
/anthropic\.claude-(opus|sonnet|haiku)-(4-5|4|3-7)-/,
];
export const isThinkingWithToolClaudeModel = (model: string): boolean => {
return thinkingWithToolClaudeModelPatterns.some((pattern) => pattern.test(model));
};
/**
* Regex patterns for Claude 4+ models that have temperature/top_p parameter conflict
* (cannot set both temperature and top_p at the same time)
*/
export const temperatureTopPConflictModelPatterns: RegExp[] = [
// Claude 4+ series - Anthropic API (4, 4.1, 4.5)
/^claude-(opus|sonnet|haiku)-4/,
// OpenRouter format
/^anthropic\/claude-(opus|sonnet|haiku)-(4\.5|4\.1|4)/,
/^anthropic\/claude-(4\.5|4\.1|4)-(opus|sonnet|haiku)/,
// AWS Bedrock format: [region.]anthropic.claude-xxx
/anthropic\.claude-(opus|sonnet|haiku)-4/,
];
export const hasTemperatureTopPConflict = (model: string): boolean => {
return temperatureTopPConflictModelPatterns.some((pattern) => pattern.test(model));
};

View File

@@ -223,3 +223,17 @@ export const buildAnthropicTools = (
}),
);
};
export const buildSearchTool = (): Anthropic.WebSearchTool20250305 => {
const maxUses = process.env.ANTHROPIC_MAX_USES;
return {
name: 'web_search',
type: 'web_search_20250305',
...(maxUses &&
Number.isInteger(Number(maxUses)) &&
Number(maxUses) > 0 && {
max_uses: Number(maxUses),
}),
};
};

View File

@@ -1,10 +1,7 @@
import { describe, expect, it } from 'vitest';
import {
MODEL_PARAMETER_CONFLICTS,
createParameterResolver,
resolveParameters,
} from './parameterResolver';
import { hasTemperatureTopPConflict } from '../const/models';
import { createParameterResolver, resolveParameters } from './parameterResolver';
describe('resolveParameters', () => {
describe('Basic functionality', () => {
@@ -247,54 +244,41 @@ describe('createParameterResolver', () => {
});
});
describe('MODEL_PARAMETER_CONFLICTS', () => {
describe('ANTHROPIC_CLAUDE_4_PLUS', () => {
it('should contain expected Claude 4+ models', () => {
expect(MODEL_PARAMETER_CONFLICTS.ANTHROPIC_CLAUDE_4_PLUS.has('claude-opus-4-1')).toBe(true);
expect(
MODEL_PARAMETER_CONFLICTS.ANTHROPIC_CLAUDE_4_PLUS.has('claude-opus-4-1-20250805'),
).toBe(true);
expect(
MODEL_PARAMETER_CONFLICTS.ANTHROPIC_CLAUDE_4_PLUS.has('claude-sonnet-4-5-20250929'),
).toBe(true);
describe('hasTemperatureTopPConflict', () => {
describe('Anthropic Claude 4+ models', () => {
it('should return true for Claude 4+ models', () => {
expect(hasTemperatureTopPConflict('claude-opus-4-1-20250805')).toBe(true);
expect(hasTemperatureTopPConflict('claude-sonnet-4-5-20250929')).toBe(true);
expect(hasTemperatureTopPConflict('claude-haiku-4-5-20251001')).toBe(true);
});
it('should not contain Claude 3.x models', () => {
expect(MODEL_PARAMETER_CONFLICTS.ANTHROPIC_CLAUDE_4_PLUS.has('claude-3-opus-20240229')).toBe(
false,
);
expect(
MODEL_PARAMETER_CONFLICTS.ANTHROPIC_CLAUDE_4_PLUS.has('claude-3.5-sonnet-20240620'),
).toBe(false);
it('should return false for Claude 3.x models', () => {
expect(hasTemperatureTopPConflict('claude-3-opus-20240229')).toBe(false);
expect(hasTemperatureTopPConflict('claude-3-5-sonnet-20240620')).toBe(false);
});
});
describe('BEDROCK_CLAUDE_4_PLUS', () => {
it('should contain both standard and Bedrock-specific model IDs', () => {
expect(MODEL_PARAMETER_CONFLICTS.BEDROCK_CLAUDE_4_PLUS.has('claude-opus-4-1')).toBe(true);
expect(
MODEL_PARAMETER_CONFLICTS.BEDROCK_CLAUDE_4_PLUS.has(
'anthropic.claude-opus-4-1-20250805-v1:0',
),
).toBe(true);
expect(
MODEL_PARAMETER_CONFLICTS.BEDROCK_CLAUDE_4_PLUS.has(
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
),
).toBe(true);
describe('OpenRouter Claude 4+ models', () => {
it('should return true for OpenRouter Claude 4+ models', () => {
expect(hasTemperatureTopPConflict('anthropic/claude-opus-4.5')).toBe(true);
expect(hasTemperatureTopPConflict('anthropic/claude-sonnet-4.1')).toBe(true);
expect(hasTemperatureTopPConflict('anthropic/claude-4.5-opus')).toBe(true);
});
it('should contain all Bedrock regional variants', () => {
expect(
MODEL_PARAMETER_CONFLICTS.BEDROCK_CLAUDE_4_PLUS.has(
'anthropic.claude-opus-4-20250514-v1:0',
),
).toBe(true);
expect(
MODEL_PARAMETER_CONFLICTS.BEDROCK_CLAUDE_4_PLUS.has(
'us.anthropic.claude-opus-4-20250514-v1:0',
),
).toBe(true);
it('should return false for OpenRouter Claude 3.x models', () => {
expect(hasTemperatureTopPConflict('anthropic/claude-3.5-sonnet')).toBe(false);
expect(hasTemperatureTopPConflict('anthropic/claude-3.7-sonnet')).toBe(false);
});
});
describe('Bedrock Claude 4+ models', () => {
it('should return true for Bedrock Claude 4+ models', () => {
expect(hasTemperatureTopPConflict('anthropic.claude-opus-4-1-20250805-v1:0')).toBe(true);
expect(hasTemperatureTopPConflict('us.anthropic.claude-sonnet-4-5-20250929-v1:0')).toBe(true);
});
it('should return false for Bedrock Claude 3.x models', () => {
expect(hasTemperatureTopPConflict('anthropic.claude-3-5-sonnet-20240620-v1:0')).toBe(false);
});
});
});

View File

@@ -239,44 +239,3 @@ export const createParameterResolver = (options: ParameterResolverOptions) => {
return resolveParameters(config, options);
};
};
/**
* Common model sets that have parameter conflicts
*/
export const MODEL_PARAMETER_CONFLICTS = {
/**
* Claude models after Opus 4.1 that don't allow both temperature and top_p
*/
ANTHROPIC_CLAUDE_4_PLUS: new Set([
'claude-opus-4-1',
'claude-opus-4-1-20250805',
'claude-sonnet-4-5-20250929',
'claude-haiku-4-5-20251001',
'claude-opus-4-5-20251101',
]),
/**
* Bedrock Claude 4+ models (including Bedrock-specific model IDs)
*/
BEDROCK_CLAUDE_4_PLUS: new Set([
'claude-opus-4-1',
'claude-opus-4-1-20250805',
'claude-opus-4-20250514',
'claude-sonnet-4-20250514',
'claude-sonnet-4-5-20250929',
'claude-haiku-4-5-20251001',
// Bedrock model IDs
'anthropic.claude-opus-4-1-20250805-v1:0',
'us.anthropic.claude-opus-4-1-20250805-v1:0',
'anthropic.claude-opus-4-20250514-v1:0',
'us.anthropic.claude-opus-4-20250514-v1:0',
'anthropic.claude-sonnet-4-20250514-v1:0',
'us.anthropic.claude-sonnet-4-20250514-v1:0',
'anthropic.claude-sonnet-4-5-20250929-v1:0',
'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
'anthropic.claude-haiku-4-5-20251001-v1:0',
'us.anthropic.claude-haiku-4-5-20251001-v1:0',
'global.anthropic.claude-opus-4-5-20251101-v1:0',
'anthropic.claude-opus-4-5-20251101-v1:0',
]),
};

View File

@@ -1,3 +1,4 @@
export * from './const/models';
export * from './core/BaseAI';
export { pruneReasoningPayload } from './core/contextBuilders/openai';
export { ModelRuntime } from './core/ModelRuntime';

View File

@@ -1,9 +1,14 @@
import Anthropic, { ClientOptions } from '@anthropic-ai/sdk';
import { ModelProvider } from 'model-bank';
import { hasTemperatureTopPConflict } from '../../const/models';
import { LobeRuntimeAI } from '../../core/BaseAI';
import { buildAnthropicMessages, buildAnthropicTools } from '../../core/contextBuilders/anthropic';
import { MODEL_PARAMETER_CONFLICTS, resolveParameters } from '../../core/parameterResolver';
import {
buildAnthropicMessages,
buildAnthropicTools,
buildSearchTool,
} from '../../core/contextBuilders/anthropic';
import { resolveParameters } from '../../core/parameterResolver';
import { AnthropicStream } from '../../core/streams';
import {
type ChatCompletionErrorPayload,
@@ -22,6 +27,7 @@ import { StreamingResponse } from '../../utils/response';
import { createAnthropicGenerateObject } from './generateObject';
import { handleAnthropicError } from './handleAnthropicError';
import { resolveCacheTTL } from './resolveCacheTTL';
import { resolveMaxTokens } from './resolveMaxTokens';
export interface AnthropicModelCard {
created_at: string;
@@ -31,8 +37,6 @@ export interface AnthropicModelCard {
type anthropicTools = Anthropic.Tool | Anthropic.WebSearchTool20250305;
const modelsWithSmallContextWindow = new Set(['claude-3-opus-20240229', 'claude-3-haiku-20240307']);
const DEFAULT_BASE_URL = 'https://api.anthropic.com';
interface AnthropicAIParams extends ClientOptions {
@@ -140,15 +144,13 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
} = payload;
const { anthropic: anthropicModels } = await import('model-bank');
const modelConfig = anthropicModels.find((m) => m.id === model);
const defaultMaxOutput = modelConfig?.maxOutput;
// 配置优先级:用户设置 > 模型配置 > 硬编码默认值
const getMaxTokens = () => {
if (max_tokens) return max_tokens;
if (defaultMaxOutput) return defaultMaxOutput;
return undefined;
};
const resolvedMaxTokens = await resolveMaxTokens({
max_tokens,
model,
providerModels: anthropicModels,
thinking,
});
const system_message = messages.find((m) => m.role === 'system');
const user_messages = messages.filter((m) => m.role !== 'system');
@@ -170,20 +172,8 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
});
if (enabledSearch) {
// Limit the number of searches per request
const maxUses = process.env.ANTHROPIC_MAX_USES;
const webSearchTool = buildSearchTool();
const webSearchTool: Anthropic.WebSearchTool20250305 = {
name: 'web_search',
type: 'web_search_20250305',
...(maxUses &&
Number.isInteger(Number(maxUses)) &&
Number(maxUses) > 0 && {
max_uses: Number(maxUses),
}),
};
// 如果已有工具,则添加到现有工具列表中;否则创建新的工具列表
if (postTools && postTools.length > 0) {
postTools = [...postTools, webSearchTool];
} else {
@@ -192,19 +182,17 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
}
if (!!thinking && thinking.type === 'enabled') {
const maxTokens = getMaxTokens() || 32_000; // Claude Opus 4 has minimum maxOutput
// `temperature` may only be set to 1 when thinking is enabled.
// `top_p` must be unset when thinking is enabled.
return {
max_tokens: maxTokens,
max_tokens: resolvedMaxTokens,
messages: postMessages,
model,
system: systemPrompts,
thinking: {
...thinking,
budget_tokens: thinking?.budget_tokens
? Math.min(thinking.budget_tokens, maxTokens - 1) // `max_tokens` must be greater than `thinking.budget_tokens`.
? Math.min(thinking.budget_tokens, resolvedMaxTokens - 1) // `max_tokens` must be greater than `thinking.budget_tokens`.
: 1024,
},
tools: postTools,
@@ -212,7 +200,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
}
// Resolve temperature and top_p parameters based on model constraints
const hasConflict = MODEL_PARAMETER_CONFLICTS.ANTHROPIC_CLAUDE_4_PLUS.has(model);
const hasConflict = hasTemperatureTopPConflict(model);
const resolvedParams = resolveParameters(
{ temperature, top_p },
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
@@ -221,7 +209,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
return {
// claude 3 series model hax max output token of 4096, 3.x series has 8192
// https://docs.anthropic.com/en/docs/about-claude/models/all-models#:~:text=200K-,Max%20output,-Normal%3A
max_tokens: getMaxTokens() || (modelsWithSmallContextWindow.has(model) ? 4096 : 8192),
max_tokens: resolvedMaxTokens,
messages: postMessages,
model,
system: systemPrompts,

View File

@@ -0,0 +1,35 @@
import type { ChatStreamPayload } from '../../types';
const smallContextWindowPatterns = [
/claude-3-opus-20240229/,
/claude-3-haiku-20240307/,
/claude-v2(:1)?$/,
];
/**
* Resolve the max_tokens value to align Anthropic and Bedrock behavior.
* Priority: user input > model-bank default maxOutput > hardcoded fallback (context-window aware).
*/
export const resolveMaxTokens = async ({
max_tokens,
model,
thinking,
providerModels,
}: {
max_tokens?: number;
model: string;
providerModels: { id: string; maxOutput?: number }[];
thinking?: ChatStreamPayload['thinking'];
}) => {
const defaultMaxOutput = providerModels.find((m) => m.id === model)?.maxOutput;
const preferredMaxTokens = max_tokens ?? defaultMaxOutput;
if (preferredMaxTokens) return preferredMaxTokens;
if (thinking?.type === 'enabled') return 32_000;
const hasSmallContextWindow = smallContextWindowPatterns.some((pattern) => pattern.test(model));
return hasSmallContextWindow ? 4096 : 8192;
};

View File

@@ -10,8 +10,6 @@ import { AgentRuntimeErrorType } from '../../types/error';
import * as debugStreamModule from '../../utils/debugStream';
import { LobeBedrockAI, experimental_buildLlama2Prompt } from './index';
const provider = 'bedrock';
// Mock the console.error to avoid polluting test output
vi.spyOn(console, 'error').mockImplementation(() => {});
@@ -478,7 +476,7 @@ describe('LobeBedrockAI', () => {
accept: 'application/json',
body: JSON.stringify({
anthropic_version: 'bedrock-2023-05-31',
max_tokens: 4096,
max_tokens: 8192,
messages: [
{
content: [
@@ -521,7 +519,7 @@ describe('LobeBedrockAI', () => {
accept: 'application/json',
body: JSON.stringify({
anthropic_version: 'bedrock-2023-05-31',
max_tokens: 4096,
max_tokens: 8192,
messages: [
{
content: [
@@ -565,7 +563,7 @@ describe('LobeBedrockAI', () => {
accept: 'application/json',
body: JSON.stringify({
anthropic_version: 'bedrock-2023-05-31',
max_tokens: 4096,
max_tokens: 8192,
messages: [
{
content: [
@@ -610,7 +608,7 @@ describe('LobeBedrockAI', () => {
accept: 'application/json',
body: JSON.stringify({
anthropic_version: 'bedrock-2023-05-31',
max_tokens: 4096,
max_tokens: 64_000,
messages: [
{
content: [
@@ -654,7 +652,7 @@ describe('LobeBedrockAI', () => {
accept: 'application/json',
body: JSON.stringify({
anthropic_version: 'bedrock-2023-05-31',
max_tokens: 4096,
max_tokens: 8192,
messages: [
{
content: [

View File

@@ -1,3 +1,4 @@
import type Anthropic from '@anthropic-ai/sdk';
import {
BedrockRuntimeClient,
InvokeModelCommand,
@@ -5,9 +6,10 @@ import {
} from '@aws-sdk/client-bedrock-runtime';
import { ModelProvider } from 'model-bank';
import { hasTemperatureTopPConflict } from '../../const/models';
import { LobeRuntimeAI } from '../../core/BaseAI';
import { buildAnthropicMessages, buildAnthropicTools } from '../../core/contextBuilders/anthropic';
import { MODEL_PARAMETER_CONFLICTS, resolveParameters } from '../../core/parameterResolver';
import { resolveParameters } from '../../core/parameterResolver';
import {
AWSBedrockClaudeStream,
AWSBedrockLlamaStream,
@@ -26,6 +28,7 @@ import { debugStream } from '../../utils/debugStream';
import { getModelPricing } from '../../utils/getModelPricing';
import { StreamingResponse } from '../../utils/response';
import { resolveCacheTTL } from '../anthropic/resolveCacheTTL';
import { resolveMaxTokens } from '../anthropic/resolveMaxTokens';
/**
* A prompt constructor for HuggingFace LLama 2 chat models.
@@ -62,19 +65,24 @@ export function experimental_buildLlama2Prompt(messages: { content: string; role
export interface LobeBedrockAIParams {
accessKeyId?: string;
accessKeySecret?: string;
id?: string;
region?: string;
sessionToken?: string;
}
export class LobeBedrockAI implements LobeRuntimeAI {
private client: BedrockRuntimeClient;
private id: string;
region: string;
constructor({ region, accessKeyId, accessKeySecret, sessionToken }: LobeBedrockAIParams = {}) {
constructor(options: LobeBedrockAIParams = {}) {
const { id, region, accessKeyId, accessKeySecret, sessionToken } = options;
if (!(accessKeyId && accessKeySecret))
throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidBedrockCredentials);
this.region = region ?? 'us-east-1';
this.id = id ?? ModelProvider.Bedrock;
this.client = new BedrockRuntimeClient({
credentials: {
accessKeyId: accessKeyId,
@@ -158,18 +166,28 @@ export class LobeBedrockAI implements LobeRuntimeAI {
temperature,
top_p,
tools,
thinking,
} = payload;
const inputStartAt = Date.now();
const system_message = messages.find((m) => m.role === 'system');
const user_messages = messages.filter((m) => m.role !== 'system');
// Resolve temperature and top_p parameters based on model constraints
const hasConflict = MODEL_PARAMETER_CONFLICTS.BEDROCK_CLAUDE_4_PLUS.has(model);
const hasConflict = hasTemperatureTopPConflict(model);
const resolvedParams = resolveParameters(
{ temperature, top_p },
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
);
const { bedrock: bedrockModels } = await import('model-bank');
const resolvedMaxTokens = await resolveMaxTokens({
max_tokens,
model,
providerModels: bedrockModels,
thinking,
});
const systemPrompts = !!system_message?.content
? ([
{
@@ -177,19 +195,40 @@ export class LobeBedrockAI implements LobeRuntimeAI {
text: system_message.content as string,
type: 'text',
},
] as any)
] as Anthropic.TextBlockParam[])
: undefined;
const anthropicPayload = {
const postTools = buildAnthropicTools(tools, {
enabledContextCaching,
});
const anthropicBase = {
anthropic_version: 'bedrock-2023-05-31',
max_tokens: max_tokens || 4096,
max_tokens: resolvedMaxTokens,
messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
system: systemPrompts,
temperature: resolvedParams.temperature,
tools: buildAnthropicTools(tools, { enabledContextCaching }),
top_p: resolvedParams.top_p,
tools: postTools,
};
const anthropicPayload =
thinking?.type === 'enabled'
? {
...anthropicBase,
thinking: {
...thinking,
// `max_tokens` must be greater than `budget_tokens`
budget_tokens: Math.max(
1,
Math.min(thinking.budget_tokens || 1024, resolvedMaxTokens - 1),
),
},
}
: {
...anthropicBase,
temperature: resolvedParams.temperature,
top_p: resolvedParams.top_p,
};
const command = new InvokeModelWithResponseStreamCommand({
accept: 'application/json',
body: JSON.stringify(anthropicPayload),
@@ -209,7 +248,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
debugStream(debug).catch(console.error);
}
const pricing = await getModelPricing(payload.model, ModelProvider.Bedrock);
const pricing = await getModelPricing(payload.model, this.id);
const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
@@ -218,7 +257,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
AWSBedrockClaudeStream(prod, {
callbacks: options?.callback,
inputStartAt,
payload: { model, pricing, pricingOptions, provider: ModelProvider.Bedrock },
payload: { model, pricing, pricingOptions, provider: this.id },
}),
{
headers: options?.headers,

View File

@@ -1,4 +1,5 @@
import { contextCachingModels, thinkingWithToolClaudeModels } from '@/const/models';
import { isContextCachingModel, isThinkingWithToolClaudeModel } from '@lobechat/model-runtime';
import { DEFAULT_AGENT_CHAT_CONFIG, DEFAULT_AGENT_SEARCH_FC_MODEL } from '@/const/settings';
import { AgentStoreState } from '@/store/agent/initialState';
import { LobeAgentChatConfig } from '@/types/agent';
@@ -24,12 +25,12 @@ const enableHistoryCount = (s: AgentStoreState) => {
// 如果开启了上下文缓存,且当前模型类型匹配,则不开启历史记录
const enableContextCaching = !chatConfig.disableContextCaching;
if (enableContextCaching && contextCachingModels.has(config.model)) return false;
if (enableContextCaching && isContextCachingModel(config.model)) return false;
// 当开启搜索时,针对 claude 3.7 sonnet 模型不开启历史记录
const enableSearch = isAgentEnableSearch(s);
if (enableSearch && thinkingWithToolClaudeModels.has(config.model)) return false;
if (enableSearch && isThinkingWithToolClaudeModel(config.model)) return false;
return chatConfig.enableHistoryCount;
};