mirror of
https://github.com/lobehub/lobehub.git
synced 2026-03-27 13:29:15 +07:00
💄 style(model-runtime): add Claude Opus 4.6 support for Bedrock runtime (#12155)
* ✨ feat(model-runtime): add Claude Opus 4.6 support for Bedrock runtime - Add Opus 4.6 to Bedrock and LobeHub hosted model banks with adaptive thinking and effort config - Sync Bedrock runtime to support adaptive thinking (type: 'adaptive') and output_config.effort - Strip assistant turn prefill for Opus 4.6 (not supported) - Add missing search ability to Opus 4.5, Sonnet 4.5, Haiku 4.5 and 3.7 Sonnet in Bedrock - Fix single-quote string escaping issues in Bedrock model descriptions * 🐛 fix(model-runtime): clamp default thinking budget_tokens against max_tokens * 🐛 fix(model-runtime): remove unnecessary 'as any' for adaptive thinking type * 💄 style: update planCardModels to latest model lineup
This commit is contained in:
@@ -5,12 +5,47 @@ const bedrockChatModels: AIChatModelCard[] = [
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
search: true,
|
||||
structuredOutput: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description:
|
||||
'Claude Opus 4.5 is Anthropic’s flagship model, combining exceptional intelligence and scalable performance for complex tasks requiring the highest-quality responses and reasoning.',
|
||||
"Claude Opus 4.6 is Anthropic's most intelligent model for building agents and coding.",
|
||||
displayName: 'Claude Opus 4.6',
|
||||
enabled: true,
|
||||
id: 'us.anthropic.claude-opus-4-6-v1',
|
||||
maxOutput: 128_000,
|
||||
pricing: {
|
||||
units: [
|
||||
{ name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{
|
||||
lookup: { prices: { '1h': 10, '5m': 6.25 }, pricingParams: ['ttl'] },
|
||||
name: 'textInput_cacheWrite',
|
||||
strategy: 'lookup',
|
||||
unit: 'millionTokens',
|
||||
},
|
||||
],
|
||||
},
|
||||
releasedAt: '2026-02-05',
|
||||
settings: {
|
||||
extendParams: ['disableContextCaching', 'enableAdaptiveThinking', 'effort'],
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
search: true,
|
||||
structuredOutput: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description:
|
||||
"Claude Opus 4.5 is Anthropic's flagship model, combining exceptional intelligence and scalable performance for complex tasks requiring the highest-quality responses and reasoning.",
|
||||
displayName: 'Claude Opus 4.5',
|
||||
enabled: true,
|
||||
id: 'global.anthropic.claude-opus-4-5-20251101-v1:0',
|
||||
@@ -32,11 +67,12 @@ const bedrockChatModels: AIChatModelCard[] = [
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
search: true,
|
||||
structuredOutput: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description: 'Claude Sonnet 4.5 is Anthropic’s most intelligent model to date.',
|
||||
description: "Claude Sonnet 4.5 is Anthropic's most intelligent model to date.",
|
||||
displayName: 'Claude Sonnet 4.5',
|
||||
enabled: true,
|
||||
id: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
||||
@@ -57,12 +93,13 @@ const bedrockChatModels: AIChatModelCard[] = [
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
search: true,
|
||||
structuredOutput: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description:
|
||||
'Claude Haiku 4.5 is Anthropic’s fastest and most intelligent Haiku model, with lightning speed and extended thinking.',
|
||||
"Claude Haiku 4.5 is Anthropic's fastest and most intelligent Haiku model, with lightning speed and extended thinking.",
|
||||
displayName: 'Claude Haiku 4.5',
|
||||
enabled: true,
|
||||
id: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
|
||||
@@ -104,12 +141,13 @@ const bedrockChatModels: AIChatModelCard[] = [
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
search: true,
|
||||
structuredOutput: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description:
|
||||
'Claude 3.7 Sonnet is Anthropic’s fastest next-gen model. Compared to Claude 3 Haiku, it improves across skills and surpasses the previous flagship Claude 3 Opus on many intelligence benchmarks.',
|
||||
"Claude 3.7 Sonnet is Anthropic's fastest next-gen model. Compared to Claude 3 Haiku, it improves across skills and surpasses the previous flagship Claude 3 Opus on many intelligence benchmarks.",
|
||||
displayName: 'Claude 3.7 Sonnet',
|
||||
id: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
|
||||
maxOutput: 64_000,
|
||||
|
||||
@@ -91,6 +91,34 @@ export const anthropicChatModels: AIChatModelCard[] = [
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
search: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 200_000,
|
||||
description:
|
||||
"Claude Opus 4.6 is Anthropic's most intelligent model for building agents and coding.",
|
||||
displayName: 'Claude Opus 4.6',
|
||||
enabled: true,
|
||||
id: 'claude-opus-4-6',
|
||||
maxOutput: 128_000,
|
||||
pricing: {
|
||||
units: [
|
||||
{ name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textInput_cacheWrite', rate: 6.25, strategy: 'fixed', unit: 'millionTokens' },
|
||||
],
|
||||
},
|
||||
releasedAt: '2026-02-05',
|
||||
settings: {
|
||||
extendParams: ['disableContextCaching', 'enableAdaptiveThinking', 'effort'],
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
|
||||
@@ -19,4 +19,4 @@ const LobeHub: ModelProviderCard = {
|
||||
|
||||
export default LobeHub;
|
||||
|
||||
export const planCardModels = ['gpt-4o-mini', 'deepseek-reasoner', 'claude-3-5-sonnet-latest'];
|
||||
export const planCardModels = ['claude-sonnet-4-5-20250929', 'gemini-3-pro-preview', 'gpt-5.2', 'deepseek-chat'];
|
||||
|
||||
@@ -166,12 +166,13 @@ export const buildDefaultAnthropicPayload = async (
|
||||
const resolvedThinking: Anthropic.MessageCreateParams['thinking'] =
|
||||
thinking.type === 'enabled'
|
||||
? {
|
||||
budget_tokens: thinking?.budget_tokens
|
||||
? Math.min(thinking.budget_tokens, resolvedMaxTokens - 1)
|
||||
: 1024,
|
||||
budget_tokens: Math.min(
|
||||
thinking?.budget_tokens || 1024,
|
||||
resolvedMaxTokens - 1,
|
||||
),
|
||||
type: 'enabled',
|
||||
}
|
||||
: { type: 'adaptive' as any };
|
||||
: { type: 'adaptive' };
|
||||
|
||||
return {
|
||||
max_tokens: resolvedMaxTokens,
|
||||
|
||||
@@ -754,7 +754,7 @@ describe('LobeAnthropicAI', () => {
|
||||
],
|
||||
model: 'claude-3-haiku-20240307',
|
||||
system: undefined,
|
||||
thinking: { type: 'enabled', budget_tokens: 1024 },
|
||||
thinking: { type: 'enabled', budget_tokens: 999 },
|
||||
tools: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -160,6 +160,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
||||
options?: ChatMethodOptions,
|
||||
): Promise<Response> => {
|
||||
const {
|
||||
effort,
|
||||
enabledContextCaching = true,
|
||||
max_tokens,
|
||||
messages,
|
||||
@@ -173,13 +174,6 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
||||
const system_message = messages.find((m) => m.role === 'system');
|
||||
const user_messages = messages.filter((m) => m.role !== 'system');
|
||||
|
||||
// Resolve temperature and top_p parameters based on model constraints
|
||||
const hasConflict = hasTemperatureTopPConflict(model);
|
||||
const resolvedParams = resolveParameters(
|
||||
{ temperature, top_p },
|
||||
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
|
||||
);
|
||||
|
||||
const { bedrock: bedrockModels } = await import('model-bank');
|
||||
|
||||
const resolvedMaxTokens = await resolveMaxTokens({
|
||||
@@ -203,32 +197,54 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
||||
enabledContextCaching,
|
||||
});
|
||||
|
||||
const postMessages = await buildAnthropicMessages(user_messages, { enabledContextCaching });
|
||||
|
||||
// Claude Opus 4.6 does not support assistant turn prefill
|
||||
if (model.includes('opus-4-6') && postMessages.at(-1)?.role === 'assistant') {
|
||||
postMessages.pop();
|
||||
}
|
||||
|
||||
const anthropicBase = {
|
||||
anthropic_version: 'bedrock-2023-05-31',
|
||||
max_tokens: resolvedMaxTokens,
|
||||
messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
|
||||
messages: postMessages,
|
||||
system: systemPrompts,
|
||||
tools: postTools,
|
||||
};
|
||||
|
||||
const anthropicPayload =
|
||||
thinking?.type === 'enabled'
|
||||
? {
|
||||
...anthropicBase,
|
||||
thinking: {
|
||||
...thinking,
|
||||
// `max_tokens` must be greater than `budget_tokens`
|
||||
budget_tokens: Math.max(
|
||||
1,
|
||||
Math.min(thinking.budget_tokens || 1024, resolvedMaxTokens - 1),
|
||||
let anthropicPayload;
|
||||
|
||||
if (!!thinking && (thinking.type === 'enabled' || thinking.type === 'adaptive')) {
|
||||
const resolvedThinking =
|
||||
thinking.type === 'enabled'
|
||||
? {
|
||||
budget_tokens: Math.min(
|
||||
thinking?.budget_tokens || 1024,
|
||||
resolvedMaxTokens - 1,
|
||||
),
|
||||
},
|
||||
}
|
||||
: {
|
||||
...anthropicBase,
|
||||
temperature: resolvedParams.temperature,
|
||||
top_p: resolvedParams.top_p,
|
||||
};
|
||||
type: 'enabled' as const,
|
||||
}
|
||||
: { type: 'adaptive' as const };
|
||||
|
||||
anthropicPayload = {
|
||||
...anthropicBase,
|
||||
...(thinking.type === 'adaptive' && effort ? { output_config: { effort } } : {}),
|
||||
thinking: resolvedThinking,
|
||||
};
|
||||
} else {
|
||||
// Resolve temperature and top_p parameters based on model constraints
|
||||
const hasConflict = hasTemperatureTopPConflict(model);
|
||||
const resolvedParams = resolveParameters(
|
||||
{ temperature, top_p },
|
||||
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
|
||||
);
|
||||
|
||||
anthropicPayload = {
|
||||
...anthropicBase,
|
||||
temperature: resolvedParams.temperature,
|
||||
top_p: resolvedParams.top_p,
|
||||
};
|
||||
}
|
||||
|
||||
const command = new InvokeModelWithResponseStreamCommand({
|
||||
accept: 'application/json',
|
||||
@@ -250,7 +266,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
|
||||
}
|
||||
|
||||
const pricing = await getModelPricing(payload.model, this.id);
|
||||
const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
|
||||
const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicBase);
|
||||
const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
|
||||
|
||||
// Respond with the stream
|
||||
|
||||
Reference in New Issue
Block a user