💄 style(model-runtime): add Claude Opus 4.6 support for Bedrock runtime (#12155)

*  feat(model-runtime): add Claude Opus 4.6 support for Bedrock runtime

- Add Opus 4.6 to Bedrock and LobeHub hosted model banks with adaptive thinking and effort config
- Sync Bedrock runtime to support adaptive thinking (type: 'adaptive') and output_config.effort
- Strip assistant turn prefill for Opus 4.6 (not supported)
- Add missing search ability to Opus 4.5, Sonnet 4.5, Haiku 4.5 and 3.7 Sonnet in Bedrock
- Fix single-quote string escaping issues in Bedrock model descriptions

* 🐛 fix(model-runtime): clamp default thinking budget_tokens against max_tokens

* 🐛 fix(model-runtime): remove unnecessary 'as any' for adaptive thinking type

* 💄 style: update planCardModels to latest model lineup
This commit is contained in:
YuTengjing
2026-02-06 18:35:04 +08:00
committed by GitHub
parent f628564acf
commit 90a75af669
6 changed files with 119 additions and 36 deletions

View File

@@ -5,12 +5,47 @@ const bedrockChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
vision: true,
},
contextWindowTokens: 200_000,
description:
'Claude Opus 4.5 is Anthropics flagship model, combining exceptional intelligence and scalable performance for complex tasks requiring the highest-quality responses and reasoning.',
"Claude Opus 4.6 is Anthropic's most intelligent model for building agents and coding.",
displayName: 'Claude Opus 4.6',
enabled: true,
id: 'us.anthropic.claude-opus-4-6-v1',
maxOutput: 128_000,
pricing: {
units: [
{ name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
{
lookup: { prices: { '1h': 10, '5m': 6.25 }, pricingParams: ['ttl'] },
name: 'textInput_cacheWrite',
strategy: 'lookup',
unit: 'millionTokens',
},
],
},
releasedAt: '2026-02-05',
settings: {
extendParams: ['disableContextCaching', 'enableAdaptiveThinking', 'effort'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
vision: true,
},
contextWindowTokens: 200_000,
description:
"Claude Opus 4.5 is Anthropic's flagship model, combining exceptional intelligence and scalable performance for complex tasks requiring the highest-quality responses and reasoning.",
displayName: 'Claude Opus 4.5',
enabled: true,
id: 'global.anthropic.claude-opus-4-5-20251101-v1:0',
@@ -32,11 +67,12 @@ const bedrockChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
vision: true,
},
contextWindowTokens: 200_000,
description: 'Claude Sonnet 4.5 is Anthropics most intelligent model to date.',
description: "Claude Sonnet 4.5 is Anthropic's most intelligent model to date.",
displayName: 'Claude Sonnet 4.5',
enabled: true,
id: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
@@ -57,12 +93,13 @@ const bedrockChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
vision: true,
},
contextWindowTokens: 200_000,
description:
'Claude Haiku 4.5 is Anthropics fastest and most intelligent Haiku model, with lightning speed and extended thinking.',
"Claude Haiku 4.5 is Anthropic's fastest and most intelligent Haiku model, with lightning speed and extended thinking.",
displayName: 'Claude Haiku 4.5',
enabled: true,
id: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
@@ -104,12 +141,13 @@ const bedrockChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
vision: true,
},
contextWindowTokens: 200_000,
description:
'Claude 3.7 Sonnet is Anthropics fastest next-gen model. Compared to Claude 3 Haiku, it improves across skills and surpasses the previous flagship Claude 3 Opus on many intelligence benchmarks.',
"Claude 3.7 Sonnet is Anthropic's fastest next-gen model. Compared to Claude 3 Haiku, it improves across skills and surpasses the previous flagship Claude 3 Opus on many intelligence benchmarks.",
displayName: 'Claude 3.7 Sonnet',
id: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
maxOutput: 64_000,

View File

@@ -91,6 +91,34 @@ export const anthropicChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
vision: true,
},
contextWindowTokens: 200_000,
description:
"Claude Opus 4.6 is Anthropic's most intelligent model for building agents and coding.",
displayName: 'Claude Opus 4.6',
enabled: true,
id: 'claude-opus-4-6',
maxOutput: 128_000,
pricing: {
units: [
{ name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput_cacheWrite', rate: 6.25, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2026-02-05',
settings: {
extendParams: ['disableContextCaching', 'enableAdaptiveThinking', 'effort'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,

View File

@@ -19,4 +19,4 @@ const LobeHub: ModelProviderCard = {
export default LobeHub;
export const planCardModels = ['gpt-4o-mini', 'deepseek-reasoner', 'claude-3-5-sonnet-latest'];
export const planCardModels = ['claude-sonnet-4-5-20250929', 'gemini-3-pro-preview', 'gpt-5.2', 'deepseek-chat'];

View File

@@ -166,12 +166,13 @@ export const buildDefaultAnthropicPayload = async (
const resolvedThinking: Anthropic.MessageCreateParams['thinking'] =
thinking.type === 'enabled'
? {
budget_tokens: thinking?.budget_tokens
? Math.min(thinking.budget_tokens, resolvedMaxTokens - 1)
: 1024,
budget_tokens: Math.min(
thinking?.budget_tokens || 1024,
resolvedMaxTokens - 1,
),
type: 'enabled',
}
: { type: 'adaptive' as any };
: { type: 'adaptive' };
return {
max_tokens: resolvedMaxTokens,

View File

@@ -754,7 +754,7 @@ describe('LobeAnthropicAI', () => {
],
model: 'claude-3-haiku-20240307',
system: undefined,
thinking: { type: 'enabled', budget_tokens: 1024 },
thinking: { type: 'enabled', budget_tokens: 999 },
tools: undefined,
});
});

View File

@@ -160,6 +160,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
options?: ChatMethodOptions,
): Promise<Response> => {
const {
effort,
enabledContextCaching = true,
max_tokens,
messages,
@@ -173,13 +174,6 @@ export class LobeBedrockAI implements LobeRuntimeAI {
const system_message = messages.find((m) => m.role === 'system');
const user_messages = messages.filter((m) => m.role !== 'system');
// Resolve temperature and top_p parameters based on model constraints
const hasConflict = hasTemperatureTopPConflict(model);
const resolvedParams = resolveParameters(
{ temperature, top_p },
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
);
const { bedrock: bedrockModels } = await import('model-bank');
const resolvedMaxTokens = await resolveMaxTokens({
@@ -203,32 +197,54 @@ export class LobeBedrockAI implements LobeRuntimeAI {
enabledContextCaching,
});
const postMessages = await buildAnthropicMessages(user_messages, { enabledContextCaching });
// Claude Opus 4.6 does not support assistant turn prefill
if (model.includes('opus-4-6') && postMessages.at(-1)?.role === 'assistant') {
postMessages.pop();
}
const anthropicBase = {
anthropic_version: 'bedrock-2023-05-31',
max_tokens: resolvedMaxTokens,
messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
messages: postMessages,
system: systemPrompts,
tools: postTools,
};
const anthropicPayload =
thinking?.type === 'enabled'
? {
...anthropicBase,
thinking: {
...thinking,
// `max_tokens` must be greater than `budget_tokens`
budget_tokens: Math.max(
1,
Math.min(thinking.budget_tokens || 1024, resolvedMaxTokens - 1),
let anthropicPayload;
if (!!thinking && (thinking.type === 'enabled' || thinking.type === 'adaptive')) {
const resolvedThinking =
thinking.type === 'enabled'
? {
budget_tokens: Math.min(
thinking?.budget_tokens || 1024,
resolvedMaxTokens - 1,
),
},
}
: {
...anthropicBase,
temperature: resolvedParams.temperature,
top_p: resolvedParams.top_p,
};
type: 'enabled' as const,
}
: { type: 'adaptive' as const };
anthropicPayload = {
...anthropicBase,
...(thinking.type === 'adaptive' && effort ? { output_config: { effort } } : {}),
thinking: resolvedThinking,
};
} else {
// Resolve temperature and top_p parameters based on model constraints
const hasConflict = hasTemperatureTopPConflict(model);
const resolvedParams = resolveParameters(
{ temperature, top_p },
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
);
anthropicPayload = {
...anthropicBase,
temperature: resolvedParams.temperature,
top_p: resolvedParams.top_p,
};
}
const command = new InvokeModelWithResponseStreamCommand({
accept: 'application/json',
@@ -250,7 +266,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
}
const pricing = await getModelPricing(payload.model, this.id);
const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicBase);
const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
// Respond with the stream