💄 style(model-runtime): add Claude Opus 4.6 support for Bedrock runtime (#12155)

* ✨ feat(model-runtime): add Claude Opus 4.6 support for Bedrock runtime - Add Opus 4.6 to Bedrock and LobeHub hosted model banks with adaptive thinking and effort config - Sync Bedrock runtime to support adaptive thinking (type: 'adaptive') and output_config.effort - Strip assistant turn prefill for Opus 4.6 (not supported) - Add missing search ability to Opus 4.5, Sonnet 4.5, Haiku 4.5 and 3.7 Sonnet in Bedrock - Fix single-quote string escaping issues in Bedrock model descriptions * 🐛 fix(model-runtime): clamp default thinking budget_tokens against max_tokens * 🐛 fix(model-runtime): remove unnecessary 'as any' for adaptive thinking type * 💄 style: update planCardModels to latest model lineup
2026-03-27 13:29:15 +07:00 · 2026-02-06 18:35:04 +08:00
parent f628564acf
commit 90a75af669
6 changed files with 119 additions and 36 deletions
--- a/packages/model-bank/src/aiModels/bedrock.ts
+++ b/packages/model-bank/src/aiModels/bedrock.ts
@@ -5,12 +5,47 @@ const bedrockChatModels: AIChatModelCard[] = [
    abilities: {
      functionCall: true,
      reasoning: true,
+      search: true,
      structuredOutput: true,
      vision: true,
    },
    contextWindowTokens: 200_000,
    description:
-      'Claude Opus 4.5 is Anthropic’s flagship model, combining exceptional intelligence and scalable performance for complex tasks requiring the highest-quality responses and reasoning.',
+      "Claude Opus 4.6 is Anthropic's most intelligent model for building agents and coding.",
+    displayName: 'Claude Opus 4.6',
+    enabled: true,
+    id: 'us.anthropic.claude-opus-4-6-v1',
+    maxOutput: 128_000,
+    pricing: {
+      units: [
+        { name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
+        {
+          lookup: { prices: { '1h': 10, '5m': 6.25 }, pricingParams: ['ttl'] },
+          name: 'textInput_cacheWrite',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-02-05',
+    settings: {
+      extendParams: ['disableContextCaching', 'enableAdaptiveThinking', 'effort'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      search: true,
+      structuredOutput: true,
+      vision: true,
+    },
+    contextWindowTokens: 200_000,
+    description:
+      "Claude Opus 4.5 is Anthropic's flagship model, combining exceptional intelligence and scalable performance for complex tasks requiring the highest-quality responses and reasoning.",
    displayName: 'Claude Opus 4.5',
    enabled: true,
    id: 'global.anthropic.claude-opus-4-5-20251101-v1:0',
@@ -32,11 +67,12 @@ const bedrockChatModels: AIChatModelCard[] = [
    abilities: {
      functionCall: true,
      reasoning: true,
+      search: true,
      structuredOutput: true,
      vision: true,
    },
    contextWindowTokens: 200_000,
-    description: 'Claude Sonnet 4.5 is Anthropic’s most intelligent model to date.',
+    description: "Claude Sonnet 4.5 is Anthropic's most intelligent model to date.",
    displayName: 'Claude Sonnet 4.5',
    enabled: true,
    id: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
@@ -57,12 +93,13 @@ const bedrockChatModels: AIChatModelCard[] = [
    abilities: {
      functionCall: true,
      reasoning: true,
+      search: true,
      structuredOutput: true,
      vision: true,
    },
    contextWindowTokens: 200_000,
    description:
-      'Claude Haiku 4.5 is Anthropic’s fastest and most intelligent Haiku model, with lightning speed and extended thinking.',
+      "Claude Haiku 4.5 is Anthropic's fastest and most intelligent Haiku model, with lightning speed and extended thinking.",
    displayName: 'Claude Haiku 4.5',
    enabled: true,
    id: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
@@ -104,12 +141,13 @@ const bedrockChatModels: AIChatModelCard[] = [
    abilities: {
      functionCall: true,
      reasoning: true,
+      search: true,
      structuredOutput: true,
      vision: true,
    },
    contextWindowTokens: 200_000,
    description:
-      'Claude 3.7 Sonnet is Anthropic’s fastest next-gen model. Compared to Claude 3 Haiku, it improves across skills and surpasses the previous flagship Claude 3 Opus on many intelligence benchmarks.',
+      "Claude 3.7 Sonnet is Anthropic's fastest next-gen model. Compared to Claude 3 Haiku, it improves across skills and surpasses the previous flagship Claude 3 Opus on many intelligence benchmarks.",
    displayName: 'Claude 3.7 Sonnet',
    id: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
    maxOutput: 64_000,
--- a/packages/model-bank/src/aiModels/lobehub/chat/anthropic.ts
+++ b/packages/model-bank/src/aiModels/lobehub/chat/anthropic.ts
@@ -91,6 +91,34 @@ export const anthropicChatModels: AIChatModelCard[] = [
    },
    type: 'chat',
  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      search: true,
+      vision: true,
+    },
+    contextWindowTokens: 200_000,
+    description:
+      "Claude Opus 4.6 is Anthropic's most intelligent model for building agents and coding.",
+    displayName: 'Claude Opus 4.6',
+    enabled: true,
+    id: 'claude-opus-4-6',
+    maxOutput: 128_000,
+    pricing: {
+      units: [
+        { name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheWrite', rate: 6.25, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2026-02-05',
+    settings: {
+      extendParams: ['disableContextCaching', 'enableAdaptiveThinking', 'effort'],
+    },
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
--- a/packages/model-bank/src/modelProviders/lobehub.ts
+++ b/packages/model-bank/src/modelProviders/lobehub.ts
@@ -19,4 +19,4 @@ const LobeHub: ModelProviderCard = {

 export default LobeHub;

-export const planCardModels = ['gpt-4o-mini', 'deepseek-reasoner', 'claude-3-5-sonnet-latest'];
+export const planCardModels = ['claude-sonnet-4-5-20250929', 'gemini-3-pro-preview', 'gpt-5.2', 'deepseek-chat'];
--- a/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
+++ b/packages/model-runtime/src/core/anthropicCompatibleFactory/index.ts
@@ -166,12 +166,13 @@ export const buildDefaultAnthropicPayload = async (
    const resolvedThinking: Anthropic.MessageCreateParams['thinking'] =
      thinking.type === 'enabled'
        ? {
-            budget_tokens: thinking?.budget_tokens
-              ? Math.min(thinking.budget_tokens, resolvedMaxTokens - 1)
-              : 1024,
+            budget_tokens: Math.min(
+              thinking?.budget_tokens || 1024,
+              resolvedMaxTokens - 1,
+            ),
            type: 'enabled',
          }
-        : { type: 'adaptive' as any };
+        : { type: 'adaptive' };

    return {
      max_tokens: resolvedMaxTokens,
--- a/packages/model-runtime/src/providers/anthropic/index.test.ts
+++ b/packages/model-runtime/src/providers/anthropic/index.test.ts
@@ -754,7 +754,7 @@ describe('LobeAnthropicAI', () => {
          ],
          model: 'claude-3-haiku-20240307',
          system: undefined,
-          thinking: { type: 'enabled', budget_tokens: 1024 },
+          thinking: { type: 'enabled', budget_tokens: 999 },
          tools: undefined,
        });
      });
--- a/packages/model-runtime/src/providers/bedrock/index.ts
+++ b/packages/model-runtime/src/providers/bedrock/index.ts
@@ -160,6 +160,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
    options?: ChatMethodOptions,
  ): Promise<Response> => {
    const {
+      effort,
      enabledContextCaching = true,
      max_tokens,
      messages,
@@ -173,13 +174,6 @@ export class LobeBedrockAI implements LobeRuntimeAI {
    const system_message = messages.find((m) => m.role === 'system');
    const user_messages = messages.filter((m) => m.role !== 'system');

-    // Resolve temperature and top_p parameters based on model constraints
-    const hasConflict = hasTemperatureTopPConflict(model);
-    const resolvedParams = resolveParameters(
-      { temperature, top_p },
-      { hasConflict, normalizeTemperature: true, preferTemperature: true },
-    );
-
    const { bedrock: bedrockModels } = await import('model-bank');

    const resolvedMaxTokens = await resolveMaxTokens({
@@ -203,32 +197,54 @@ export class LobeBedrockAI implements LobeRuntimeAI {
      enabledContextCaching,
    });

+    const postMessages = await buildAnthropicMessages(user_messages, { enabledContextCaching });
+
+    // Claude Opus 4.6 does not support assistant turn prefill
+    if (model.includes('opus-4-6') && postMessages.at(-1)?.role === 'assistant') {
+      postMessages.pop();
+    }
+
    const anthropicBase = {
      anthropic_version: 'bedrock-2023-05-31',
      max_tokens: resolvedMaxTokens,
-      messages: await buildAnthropicMessages(user_messages, { enabledContextCaching }),
+      messages: postMessages,
      system: systemPrompts,
      tools: postTools,
    };

-    const anthropicPayload =
-      thinking?.type === 'enabled'
-        ? {
-            ...anthropicBase,
-            thinking: {
-              ...thinking,
-              // `max_tokens` must be greater than `budget_tokens`
-              budget_tokens: Math.max(
-                1,
-                Math.min(thinking.budget_tokens || 1024, resolvedMaxTokens - 1),
+    let anthropicPayload;
+
+    if (!!thinking && (thinking.type === 'enabled' || thinking.type === 'adaptive')) {
+      const resolvedThinking =
+        thinking.type === 'enabled'
+          ? {
+              budget_tokens: Math.min(
+                thinking?.budget_tokens || 1024,
+                resolvedMaxTokens - 1,
              ),
-            },
-          }
-        : {
-            ...anthropicBase,
-            temperature: resolvedParams.temperature,
-            top_p: resolvedParams.top_p,
-          };
+              type: 'enabled' as const,
+            }
+          : { type: 'adaptive' as const };
+
+      anthropicPayload = {
+        ...anthropicBase,
+        ...(thinking.type === 'adaptive' && effort ? { output_config: { effort } } : {}),
+        thinking: resolvedThinking,
+      };
+    } else {
+      // Resolve temperature and top_p parameters based on model constraints
+      const hasConflict = hasTemperatureTopPConflict(model);
+      const resolvedParams = resolveParameters(
+        { temperature, top_p },
+        { hasConflict, normalizeTemperature: true, preferTemperature: true },
+      );
+
+      anthropicPayload = {
+        ...anthropicBase,
+        temperature: resolvedParams.temperature,
+        top_p: resolvedParams.top_p,
+      };
+    }

    const command = new InvokeModelWithResponseStreamCommand({
      accept: 'application/json',
@@ -250,7 +266,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
      }

      const pricing = await getModelPricing(payload.model, this.id);
-      const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicPayload);
+      const cacheTTL = resolveCacheTTL({ ...payload, enabledContextCaching }, anthropicBase);
      const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;

      // Respond with the stream