diff --git a/packages/model-bank/src/aiModels/lobehub/chat/minimax.ts b/packages/model-bank/src/aiModels/lobehub/chat/minimax.ts index f1e0ea3d60..1abb4e57f4 100644 --- a/packages/model-bank/src/aiModels/lobehub/chat/minimax.ts +++ b/packages/model-bank/src/aiModels/lobehub/chat/minimax.ts @@ -1,6 +1,51 @@ import type { AIChatModelCard } from '../../../types/aiModel'; export const minimaxChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 204_800, + description: + 'First self-evolving model with top-tier coding and agentic performance (~60 tps).', + displayName: 'MiniMax M2.7', + enabled: true, + id: 'MiniMax-M2.7', + maxOutput: 131_072, + pricing: { + units: [ + { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheWrite', rate: 0.375, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-18', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 204_800, + description: 'Same performance as M2.7 with significantly faster inference (~100 tps).', + displayName: 'MiniMax M2.7 Highspeed', + enabled: true, + id: 'MiniMax-M2.7-highspeed', + maxOutput: 131_072, + pricing: { + units: [ + { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheWrite', rate: 0.375, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 2.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-18', + type: 'chat', + }, { abilities: { functionCall: true, @@ -55,7 +100,6 @@ export const minimaxChatModels: AIChatModelCard[] = [ description: 'Powerful multilingual programming capabilities, comprehensively upgraded programming experience.', displayName: 'MiniMax M2.1', - enabled: true, id: 'MiniMax-M2.1', maxOutput: 131_072, pricing: { @@ -78,7 +122,6 @@ export const minimaxChatModels: AIChatModelCard[] = [ description: 'Powerful multilingual programming capabilities with faster and more efficient inference.', displayName: 'MiniMax M2.1 Highspeed', - enabled: true, id: 'MiniMax-M2.1-highspeed', maxOutput: 131_072, pricing: { diff --git a/packages/model-bank/src/aiModels/lobehub/chat/xai.ts b/packages/model-bank/src/aiModels/lobehub/chat/xai.ts index f172ff41ce..efb674b4ef 100644 --- a/packages/model-bank/src/aiModels/lobehub/chat/xai.ts +++ b/packages/model-bank/src/aiModels/lobehub/chat/xai.ts @@ -1,6 +1,103 @@ import type { AIChatModelCard } from '../../../types/aiModel'; export const xaiChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + vision: true, + }, + contextWindowTokens: 2_000_000, + description: 'Intelligent, blazing-fast model that reasons before responding', + displayName: 'Grok 4.20 Beta', + enabled: true, + id: 'grok-4.20-beta-0309-reasoning', + pricing: { + units: [ + { + name: 'textInput_cacheRead', + strategy: 'tiered', + tiers: [ + { rate: 0.2, upTo: 0.2 }, + { rate: 0.4, upTo: 'infinity' }, + ], + unit: 'millionTokens', + }, + { + name: 'textInput', + strategy: 'tiered', + tiers: [ + { rate: 2, upTo: 0.2 }, + { rate: 4, upTo: 'infinity' }, + ], + unit: 'millionTokens', + }, + { + name: 'textOutput', + strategy: 'tiered', + tiers: [ + { rate: 6, upTo: 0.2 }, + { rate: 12, upTo: 'infinity' }, + ], + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-09', + settings: { + searchImpl: 'params', + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + search: true, + vision: true, + }, + contextWindowTokens: 2_000_000, + description: 'A non-reasoning variant for simple use cases', + displayName: 'Grok 4.20 Beta (Non-Reasoning)', + enabled: true, + id: 'grok-4.20-beta-0309-non-reasoning', + pricing: { + units: [ + { + name: 'textInput_cacheRead', + strategy: 'tiered', + tiers: [ + { rate: 0.2, upTo: 0.2 }, + { rate: 0.4, upTo: 'infinity' }, + ], + unit: 'millionTokens', + }, + { + name: 'textInput', + strategy: 'tiered', + tiers: [ + { rate: 2, upTo: 0.2 }, + { rate: 4, upTo: 'infinity' }, + ], + unit: 'millionTokens', + }, + { + name: 'textOutput', + strategy: 'tiered', + tiers: [ + { rate: 6, upTo: 0.2 }, + { rate: 12, upTo: 'infinity' }, + ], + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-09', + settings: { + searchImpl: 'params', + }, + type: 'chat', + }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/aiModels/minimax.ts b/packages/model-bank/src/aiModels/minimax.ts index 3e8d666ff9..dab8877b9c 100644 --- a/packages/model-bank/src/aiModels/minimax.ts +++ b/packages/model-bank/src/aiModels/minimax.ts @@ -1,6 +1,52 @@ import { type AIChatModelCard, type AIImageModelCard } from '../types/aiModel'; const minimaxChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 204_800, + description: + 'First self-evolving model with top-tier coding and agentic performance (~60 tps).', + displayName: 'MiniMax M2.7', + enabled: true, + id: 'MiniMax-M2.7', + maxOutput: 131_072, + pricing: { + currency: 'CNY', + units: [ + { name: 'textInput_cacheRead', rate: 0.42, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheWrite', rate: 2.625, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-18', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 204_800, + description: 'Same performance as M2.7 with significantly faster inference (~100 tps).', + displayName: 'MiniMax M2.7 Highspeed', + id: 'MiniMax-M2.7-highspeed', + maxOutput: 131_072, + pricing: { + currency: 'CNY', + units: [ + { name: 'textInput_cacheRead', rate: 0.42, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheWrite', rate: 2.625, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput', rate: 4.2, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 16.8, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-18', + type: 'chat', + }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/aiModels/xai.ts b/packages/model-bank/src/aiModels/xai.ts index ea8e4a9e9c..51146f6157 100644 --- a/packages/model-bank/src/aiModels/xai.ts +++ b/packages/model-bank/src/aiModels/xai.ts @@ -11,11 +11,10 @@ const xaiChatModels: AIChatModelCard[] = [ vision: true, }, contextWindowTokens: 2_000_000, - description: - 'A team of 4 or 16 agents, Excels at research use cases, Does not currently support client-side tools. Only supports xAI server side tools (eg X Search, Web Search tools) and remote MCP tools.', - displayName: 'Grok 4.20 Multi-Agent Experimental Beta', + description: 'Intelligent, blazing-fast model that reasons before responding', + displayName: 'Grok 4.20 Beta', enabled: true, - id: 'grok-4.20-multi-agent-experimental-beta-0304', + id: 'grok-4.20-beta-0309-reasoning', pricing: { units: [ { @@ -47,9 +46,8 @@ const xaiChatModels: AIChatModelCard[] = [ }, ], }, - releasedAt: '2026-03-04', + releasedAt: '2026-03-09', settings: { - extendParams: ['grok4_20ReasoningEffort'], searchImpl: 'params', }, type: 'chat', @@ -63,9 +61,9 @@ const xaiChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 2_000_000, description: 'A non-reasoning variant for simple use cases', - displayName: 'Grok 4.20 Experimental Beta (Non-Reasoning)', + displayName: 'Grok 4.20 Beta (Non-Reasoning)', enabled: true, - id: 'grok-4.20-experimental-beta-0304-non-reasoning', + id: 'grok-4.20-beta-0309-non-reasoning', pricing: { units: [ { @@ -97,7 +95,7 @@ const xaiChatModels: AIChatModelCard[] = [ }, ], }, - releasedAt: '2026-03-04', + releasedAt: '2026-03-09', settings: { searchImpl: 'params', }, @@ -105,17 +103,17 @@ const xaiChatModels: AIChatModelCard[] = [ }, { abilities: { - functionCall: true, reasoning: true, search: true, structuredOutput: true, vision: true, }, contextWindowTokens: 2_000_000, - description: 'Intelligent, blazing-fast model that reasons before responding', - displayName: 'Grok 4.20 Experimental Beta', + description: + 'A team of 4 or 16 agents, Excels at research use cases, Does not currently support client-side tools. Only supports xAI server side tools (eg X Search, Web Search tools) and remote MCP tools.', + displayName: 'Grok 4.20 Multi-Agent Beta', enabled: true, - id: 'grok-4.20-experimental-beta-0304-reasoning', + id: 'grok-4.20-multi-agent-beta-0309', pricing: { units: [ { @@ -147,8 +145,9 @@ const xaiChatModels: AIChatModelCard[] = [ }, ], }, - releasedAt: '2026-03-04', + releasedAt: '2026-03-09', settings: { + extendParams: ['grok4_20ReasoningEffort'], searchImpl: 'params', }, type: 'chat', diff --git a/packages/model-runtime/src/providers/xai/index.test.ts b/packages/model-runtime/src/providers/xai/index.test.ts index 57b4cd82e1..a4da0d8b0d 100644 --- a/packages/model-runtime/src/providers/xai/index.test.ts +++ b/packages/model-runtime/src/providers/xai/index.test.ts @@ -61,6 +61,22 @@ describe('LobeXAI - custom features', () => { expect(createCall.stream).toBe(true); }); + it('should remove unsupported penalty parameters for grok-4.20 non-reasoning variants', async () => { + await instance.chat({ + apiMode: 'chatCompletion', + frequency_penalty: 0.4, + messages: [{ content: 'Hello', role: 'user' }], + model: 'grok-4.20-beta-0309-non-reasoning', + presence_penalty: 0.6, + } as any); + + const createCall = (instance['client'].chat.completions.create as Mock).mock.calls[0][0]; + + expect(createCall.frequency_penalty).toBeUndefined(); + expect(createCall.presence_penalty).toBeUndefined(); + expect(createCall.stream).toBe(true); + }); + it('should preserve penalty parameters for non-reasoning models', async () => { await instance.chat({ apiMode: 'chatCompletion', diff --git a/packages/model-runtime/src/providers/xai/index.ts b/packages/model-runtime/src/providers/xai/index.ts index a536368474..eeb42b1680 100644 --- a/packages/model-runtime/src/providers/xai/index.ts +++ b/packages/model-runtime/src/providers/xai/index.ts @@ -1,4 +1,4 @@ -import { LOBE_DEFAULT_MODEL_LIST, ModelProvider } from 'model-bank'; +import { ModelProvider } from 'model-bank'; import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory'; import type { ChatStreamPayload } from '../../types'; @@ -9,19 +9,16 @@ export interface XAIModelCard { id: string; } -const xaiReasoningModels = new Set( - LOBE_DEFAULT_MODEL_LIST.filter( - (model) => - model.providerId === ModelProvider.XAI && - model.type === 'chat' && - !!model.abilities?.reasoning, - ).map((model) => model.id), -); - -const isXAIReasoningModel = (model: string) => xaiReasoningModels.has(model); +// Only these legacy non-reasoning models support presencePenalty/frequencyPenalty/stop. +// All newer models reject these params, so default to stripping. +const xaiPenaltySupportedModels = new Set([ + 'grok-3', + 'grok-4-fast-non-reasoning', + 'grok-4-1-fast-non-reasoning', +]); const pruneUnsupportedReasoningParameters = (payload: ChatStreamPayload) => { - if (!isXAIReasoningModel(payload.model)) return payload; + if (xaiPenaltySupportedModels.has(payload.model)) return payload; return { ...payload,