💄 style: update Grok 4.20 to 0309 and add MiniMax M2.7 models (#13112)

2026-03-26 13:19:34 +07:00 · 2026-03-19 00:05:07 +08:00
parent 465c9699e7
commit 25e1a64c1b
6 changed files with 226 additions and 28 deletions
--- a/packages/model-bank/src/aiModels/lobehub/chat/minimax.ts
+++ b/packages/model-bank/src/aiModels/lobehub/chat/minimax.ts
@@ -1,6 +1,51 @@
 import type { AIChatModelCard } from '../../../types/aiModel';

 export const minimaxChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 204_800,
+    description:
+      'First self-evolving model with top-tier coding and agentic performance (~60 tps).',
+    displayName: 'MiniMax M2.7',
+    enabled: true,
+    id: 'MiniMax-M2.7',
+    maxOutput: 131_072,
+    pricing: {
+      units: [
+        { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheWrite', rate: 0.375, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheRead', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2026-03-18',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 204_800,
+    description: 'Same performance as M2.7 with significantly faster inference (~100 tps).',
+    displayName: 'MiniMax M2.7 Highspeed',
+    enabled: true,
+    id: 'MiniMax-M2.7-highspeed',
+    maxOutput: 131_072,
+    pricing: {
+      units: [
+        { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheWrite', rate: 0.375, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheRead', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2026-03-18',
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
@@ -55,7 +100,6 @@ export const minimaxChatModels: AIChatModelCard[] = [
    description:
      'Powerful multilingual programming capabilities, comprehensively upgraded programming experience.',
    displayName: 'MiniMax M2.1',
-    enabled: true,
    id: 'MiniMax-M2.1',
    maxOutput: 131_072,
    pricing: {
@@ -78,7 +122,6 @@ export const minimaxChatModels: AIChatModelCard[] = [
    description:
      'Powerful multilingual programming capabilities with faster and more efficient inference.',
    displayName: 'MiniMax M2.1 Highspeed',
-    enabled: true,
    id: 'MiniMax-M2.1-highspeed',
    maxOutput: 131_072,
    pricing: {
--- a/packages/model-bank/src/aiModels/lobehub/chat/xai.ts
+++ b/packages/model-bank/src/aiModels/lobehub/chat/xai.ts
@@ -1,6 +1,103 @@
 import type { AIChatModelCard } from '../../../types/aiModel';

 export const xaiChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      search: true,
+      vision: true,
+    },
+    contextWindowTokens: 2_000_000,
+    description: 'Intelligent, blazing-fast model that reasons before responding',
+    displayName: 'Grok 4.20 Beta',
+    enabled: true,
+    id: 'grok-4.20-beta-0309-reasoning',
+    pricing: {
+      units: [
+        {
+          name: 'textInput_cacheRead',
+          strategy: 'tiered',
+          tiers: [
+            { rate: 0.2, upTo: 0.2 },
+            { rate: 0.4, upTo: 'infinity' },
+          ],
+          unit: 'millionTokens',
+        },
+        {
+          name: 'textInput',
+          strategy: 'tiered',
+          tiers: [
+            { rate: 2, upTo: 0.2 },
+            { rate: 4, upTo: 'infinity' },
+          ],
+          unit: 'millionTokens',
+        },
+        {
+          name: 'textOutput',
+          strategy: 'tiered',
+          tiers: [
+            { rate: 6, upTo: 0.2 },
+            { rate: 12, upTo: 'infinity' },
+          ],
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-09',
+    settings: {
+      searchImpl: 'params',
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      search: true,
+      vision: true,
+    },
+    contextWindowTokens: 2_000_000,
+    description: 'A non-reasoning variant for simple use cases',
+    displayName: 'Grok 4.20 Beta (Non-Reasoning)',
+    enabled: true,
+    id: 'grok-4.20-beta-0309-non-reasoning',
+    pricing: {
+      units: [
+        {
+          name: 'textInput_cacheRead',
+          strategy: 'tiered',
+          tiers: [
+            { rate: 0.2, upTo: 0.2 },
+            { rate: 0.4, upTo: 'infinity' },
+          ],
+          unit: 'millionTokens',
+        },
+        {
+          name: 'textInput',
+          strategy: 'tiered',
+          tiers: [
+            { rate: 2, upTo: 0.2 },
+            { rate: 4, upTo: 'infinity' },
+          ],
+          unit: 'millionTokens',
+        },
+        {
+          name: 'textOutput',
+          strategy: 'tiered',
+          tiers: [
+            { rate: 6, upTo: 0.2 },
+            { rate: 12, upTo: 'infinity' },
+          ],
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-09',
+    settings: {
+      searchImpl: 'params',
+    },
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
--- a/packages/model-bank/src/aiModels/minimax.ts
+++ b/packages/model-bank/src/aiModels/minimax.ts
@@ -1,6 +1,52 @@
 import { type AIChatModelCard, type AIImageModelCard } from '../types/aiModel';

 const minimaxChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 204_800,
+    description:
+      'First self-evolving model with top-tier coding and agentic performance (~60 tps).',
+    displayName: 'MiniMax M2.7',
+    enabled: true,
+    id: 'MiniMax-M2.7',
+    maxOutput: 131_072,
+    pricing: {
+      currency: 'CNY',
+      units: [
+        { name: 'textInput_cacheRead', rate: 0.42, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheWrite', rate: 2.625, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2026-03-18',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+    },
+    contextWindowTokens: 204_800,
+    description: 'Same performance as M2.7 with significantly faster inference (~100 tps).',
+    displayName: 'MiniMax M2.7 Highspeed',
+    id: 'MiniMax-M2.7-highspeed',
+    maxOutput: 131_072,
+    pricing: {
+      currency: 'CNY',
+      units: [
+        { name: 'textInput_cacheRead', rate: 0.42, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheWrite', rate: 2.625, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput', rate: 4.2, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 16.8, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2026-03-18',
+    type: 'chat',
+  },
  {
    abilities: {
      functionCall: true,
--- a/packages/model-bank/src/aiModels/xai.ts
+++ b/packages/model-bank/src/aiModels/xai.ts
@@ -11,11 +11,10 @@ const xaiChatModels: AIChatModelCard[] = [
      vision: true,
    },
    contextWindowTokens: 2_000_000,
-    description:
-      'A team of 4 or 16 agents, Excels at research use cases, Does not currently support client-side tools. Only supports xAI server side tools (eg X Search, Web Search tools) and remote MCP tools.',
-    displayName: 'Grok 4.20 Multi-Agent Experimental Beta',
+    description: 'Intelligent, blazing-fast model that reasons before responding',
+    displayName: 'Grok 4.20 Beta',
    enabled: true,
-    id: 'grok-4.20-multi-agent-experimental-beta-0304',
+    id: 'grok-4.20-beta-0309-reasoning',
    pricing: {
      units: [
        {
@@ -47,9 +46,8 @@ const xaiChatModels: AIChatModelCard[] = [
        },
      ],
    },
-    releasedAt: '2026-03-04',
+    releasedAt: '2026-03-09',
    settings: {
-      extendParams: ['grok4_20ReasoningEffort'],
      searchImpl: 'params',
    },
    type: 'chat',
@@ -63,9 +61,9 @@ const xaiChatModels: AIChatModelCard[] = [
    },
    contextWindowTokens: 2_000_000,
    description: 'A non-reasoning variant for simple use cases',
-    displayName: 'Grok 4.20 Experimental Beta (Non-Reasoning)',
+    displayName: 'Grok 4.20 Beta (Non-Reasoning)',
    enabled: true,
-    id: 'grok-4.20-experimental-beta-0304-non-reasoning',
+    id: 'grok-4.20-beta-0309-non-reasoning',
    pricing: {
      units: [
        {
@@ -97,7 +95,7 @@ const xaiChatModels: AIChatModelCard[] = [
        },
      ],
    },
-    releasedAt: '2026-03-04',
+    releasedAt: '2026-03-09',
    settings: {
      searchImpl: 'params',
    },
@@ -105,17 +103,17 @@ const xaiChatModels: AIChatModelCard[] = [
  },
  {
    abilities: {
-      functionCall: true,
      reasoning: true,
      search: true,
      structuredOutput: true,
      vision: true,
    },
    contextWindowTokens: 2_000_000,
-    description: 'Intelligent, blazing-fast model that reasons before responding',
-    displayName: 'Grok 4.20 Experimental Beta',
+    description:
+      'A team of 4 or 16 agents, Excels at research use cases, Does not currently support client-side tools. Only supports xAI server side tools (eg X Search, Web Search tools) and remote MCP tools.',
+    displayName: 'Grok 4.20 Multi-Agent Beta',
    enabled: true,
-    id: 'grok-4.20-experimental-beta-0304-reasoning',
+    id: 'grok-4.20-multi-agent-beta-0309',
    pricing: {
      units: [
        {
@@ -147,8 +145,9 @@ const xaiChatModels: AIChatModelCard[] = [
        },
      ],
    },
-    releasedAt: '2026-03-04',
+    releasedAt: '2026-03-09',
    settings: {
+      extendParams: ['grok4_20ReasoningEffort'],
      searchImpl: 'params',
    },
    type: 'chat',
--- a/packages/model-runtime/src/providers/xai/index.test.ts
+++ b/packages/model-runtime/src/providers/xai/index.test.ts
@@ -61,6 +61,22 @@ describe('LobeXAI - custom features', () => {
      expect(createCall.stream).toBe(true);
    });

+    it('should remove unsupported penalty parameters for grok-4.20 non-reasoning variants', async () => {
+      await instance.chat({
+        apiMode: 'chatCompletion',
+        frequency_penalty: 0.4,
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'grok-4.20-beta-0309-non-reasoning',
+        presence_penalty: 0.6,
+      } as any);
+
+      const createCall = (instance['client'].chat.completions.create as Mock).mock.calls[0][0];
+
+      expect(createCall.frequency_penalty).toBeUndefined();
+      expect(createCall.presence_penalty).toBeUndefined();
+      expect(createCall.stream).toBe(true);
+    });
+
    it('should preserve penalty parameters for non-reasoning models', async () => {
      await instance.chat({
        apiMode: 'chatCompletion',
--- a/packages/model-runtime/src/providers/xai/index.ts
+++ b/packages/model-runtime/src/providers/xai/index.ts
@@ -1,4 +1,4 @@
-import { LOBE_DEFAULT_MODEL_LIST, ModelProvider } from 'model-bank';
+import { ModelProvider } from 'model-bank';

 import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory';
 import type { ChatStreamPayload } from '../../types';
@@ -9,19 +9,16 @@ export interface XAIModelCard {
  id: string;
 }

-const xaiReasoningModels = new Set(
-  LOBE_DEFAULT_MODEL_LIST.filter(
-    (model) =>
-      model.providerId === ModelProvider.XAI &&
-      model.type === 'chat' &&
-      !!model.abilities?.reasoning,
-  ).map((model) => model.id),
-);
-
-const isXAIReasoningModel = (model: string) => xaiReasoningModels.has(model);
+// Only these legacy non-reasoning models support presencePenalty/frequencyPenalty/stop.
+// All newer models reject these params, so default to stripping.
+const xaiPenaltySupportedModels = new Set([
+  'grok-3',
+  'grok-4-fast-non-reasoning',
+  'grok-4-1-fast-non-reasoning',
+]);

 const pruneUnsupportedReasoningParameters = (payload: ChatStreamPayload) => {
-  if (!isXAIReasoningModel(payload.model)) return payload;
+  if (xaiPenaltySupportedModels.has(payload.model)) return payload;

  return {
    ...payload,