diff --git a/locales/en-US/modelProvider.json b/locales/en-US/modelProvider.json index 240d4ece39..25ce45f955 100644 --- a/locales/en-US/modelProvider.json +++ b/locales/en-US/modelProvider.json @@ -215,6 +215,7 @@ "providerModels.item.modelConfig.displayName.placeholder": "Please enter the display name of the model, e.g., ChatGPT, GPT-4, etc.", "providerModels.item.modelConfig.displayName.title": "Model Display Name", "providerModels.item.modelConfig.extendParams.extra": "Choose extended parameters supported by the model. Hover an option to preview controls. Incorrect configs may cause request failures.", + "providerModels.item.modelConfig.extendParams.options.codexMaxReasoningEffort.hint": "For Codex models; controls reasoning intensity.", "providerModels.item.modelConfig.extendParams.options.disableContextCaching.hint": "For Claude models; can lower cost and speed up responses.", "providerModels.item.modelConfig.extendParams.options.effort.hint": "For Claude Opus 4.6; controls effort level (low/medium/high/max).", "providerModels.item.modelConfig.extendParams.options.enableAdaptiveThinking.hint": "For Claude Opus 4.6; toggles adaptive thinking on or off.", diff --git a/locales/zh-CN/modelProvider.json b/locales/zh-CN/modelProvider.json index 34c6a0135e..fabdbc03dd 100644 --- a/locales/zh-CN/modelProvider.json +++ b/locales/zh-CN/modelProvider.json @@ -215,6 +215,7 @@ "providerModels.item.modelConfig.displayName.placeholder": "请输入模型的展示名称,例如 ChatGPT、GPT-4 等", "providerModels.item.modelConfig.displayName.title": "模型展示名称", "providerModels.item.modelConfig.extendParams.extra": "选择模型支持的扩展参数。将鼠标悬停在选项上可预览控制项。配置错误可能导致请求失败。", + "providerModels.item.modelConfig.extendParams.options.codexMaxReasoningEffort.hint": "适用于 Codex 模型;控制推理强度。", "providerModels.item.modelConfig.extendParams.options.disableContextCaching.hint": "适用于 Claude 模型;可降低成本并加快响应速度。", "providerModels.item.modelConfig.extendParams.options.effort.hint": "适用于 Claude Opus 4.6;控制努力程度(低/中/高/最大)。", "providerModels.item.modelConfig.extendParams.options.enableAdaptiveThinking.hint": "适用于 Claude Opus 4.6;切换自适应思维的开启或关闭。", diff --git a/packages/model-bank/src/aiModels/aihubmix.ts b/packages/model-bank/src/aiModels/aihubmix.ts index a849f35dab..38b768a09a 100644 --- a/packages/model-bank/src/aiModels/aihubmix.ts +++ b/packages/model-bank/src/aiModels/aihubmix.ts @@ -1,6 +1,139 @@ import { type AIChatModelCard } from '../types/aiModel'; const aihubmixModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + structuredOutput: true, + vision: true, + }, + contextWindowTokens: 1_050_000, + description: + 'GPT-5.4 is the frontier model for complex professional work with highest reasoning capability.', + displayName: 'GPT-5.4', + enabled: true, + id: 'gpt-5.4', + maxOutput: 128_000, + pricing: { + units: [ + { + lookup: { + prices: { + '[0, 0.272]': 2.5, + '[0.272, infinity]': 5, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.272]': 0.25, + '[0.272, infinity]': 0.5, + }, + pricingParams: ['textInput'], + }, + name: 'textInput_cacheRead', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.272]': 15, + '[0.272, infinity]': 22.5, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-05', + settings: { + extendParams: ['gpt5_2ReasoningEffort', 'textVerbosity'], + searchImpl: 'params', + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + vision: true, + }, + contextWindowTokens: 1_050_000, + description: + 'GPT-5.4 pro uses more compute to think harder and provide consistently better answers, available in the Responses API only.', + displayName: 'GPT-5.4 pro', + id: 'gpt-5.4-pro', + maxOutput: 128_000, + pricing: { + units: [ + { + lookup: { + prices: { + '[0, 0.272]': 30, + '[0.272, infinity]': 60, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.272]': 180, + '[0.272, infinity]': 270, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-05', + settings: { + extendParams: ['gpt5_2ProReasoningEffort'], + searchImpl: 'params', + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + structuredOutput: true, + }, + contextWindowTokens: 128_000, + description: + 'GPT-5.3 Chat is the latest ChatGPT model used in ChatGPT with improved conversation experiences.', + displayName: 'GPT-5.3 Chat', + enabled: true, + id: 'gpt-5.3-chat-latest', + maxOutput: 16_384, + pricing: { + units: [ + { name: 'textInput', rate: 1.75, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.175, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 14, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-04', + type: 'chat', + }, { abilities: { functionCall: true, @@ -13,7 +146,6 @@ const aihubmixModels: AIChatModelCard[] = [ description: 'GPT-5.2 is a flagship model for coding and agentic workflows with stronger reasoning and long-context performance.', displayName: 'GPT-5.2', - enabled: true, id: 'gpt-5.2', maxOutput: 128_000, pricing: { @@ -65,7 +197,6 @@ const aihubmixModels: AIChatModelCard[] = [ description: 'GPT-5.2 Chat is the ChatGPT variant (chat-latest) for the latest conversation improvements.', displayName: 'GPT-5.2 Chat', - enabled: true, id: 'gpt-5.2-chat-latest', maxOutput: 16_384, pricing: { diff --git a/packages/model-bank/src/aiModels/google.ts b/packages/model-bank/src/aiModels/google.ts index aceb8f661b..973a9e4f7c 100644 --- a/packages/model-bank/src/aiModels/google.ts +++ b/packages/model-bank/src/aiModels/google.ts @@ -235,65 +235,6 @@ const googleChatModels: AIChatModelCard[] = [ }, type: 'chat', }, - { - abilities: { - functionCall: true, - reasoning: true, - search: true, - video: true, - vision: true, - }, - contextWindowTokens: 1_048_576 + 65_536, - description: - 'Gemini 3 Pro is Google’s most powerful agent and vibe-coding model, delivering richer visuals and deeper interaction on top of state-of-the-art reasoning.', - displayName: 'Gemini 3 Pro Preview', - id: 'gemini-3-pro-preview', - maxOutput: 65_536, - pricing: { - units: [ - { - name: 'textInput_cacheRead', - strategy: 'tiered', - tiers: [ - { rate: 0.2, upTo: 200_000 }, - { rate: 0.4, upTo: 'infinity' }, - ], - unit: 'millionTokens', - }, - { - name: 'textInput', - strategy: 'tiered', - tiers: [ - { rate: 2, upTo: 200_000 }, - { rate: 4, upTo: 'infinity' }, - ], - unit: 'millionTokens', - }, - { - name: 'textOutput', - strategy: 'tiered', - tiers: [ - { rate: 12, upTo: 200_000 }, - { rate: 18, upTo: 'infinity' }, - ], - unit: 'millionTokens', - }, - { - lookup: { prices: { '1h': 4.5 }, pricingParams: ['ttl'] }, - name: 'textInput_cacheWrite', - strategy: 'lookup', - unit: 'millionTokens', - }, - ], - }, - releasedAt: '2025-11-18', - settings: { - extendParams: ['thinkingLevel2', 'urlContext'], - searchImpl: 'params', - searchProvider: 'google', - }, - type: 'chat', - }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/aiModels/openai.ts b/packages/model-bank/src/aiModels/openai.ts index f6f031b584..0c3ffaf708 100644 --- a/packages/model-bank/src/aiModels/openai.ts +++ b/packages/model-bank/src/aiModels/openai.ts @@ -18,6 +18,167 @@ export const gptImage1ParamsSchema: ModelParamsSchema = { }; export const openaiChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + structuredOutput: true, + vision: true, + }, + contextWindowTokens: 1_050_000, + description: + 'GPT-5.4 is the frontier model for complex professional work with highest reasoning capability.', + displayName: 'GPT-5.4', + enabled: true, + id: 'gpt-5.4', + maxOutput: 128_000, + pricing: { + units: [ + { + lookup: { + prices: { + '[0, 0.272]': 2.5, + '[0.272, infinity]': 5, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.272]': 0.25, + '[0.272, infinity]': 0.5, + }, + pricingParams: ['textInput'], + }, + name: 'textInput_cacheRead', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.272]': 15, + '[0.272, infinity]': 22.5, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-05', + settings: { + extendParams: ['gpt5_2ReasoningEffort', 'textVerbosity'], + searchImpl: 'params', + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + vision: true, + }, + contextWindowTokens: 1_050_000, + description: + 'GPT-5.4 pro uses more compute to think harder and provide consistently better answers, available in the Responses API only.', + displayName: 'GPT-5.4 pro', + id: 'gpt-5.4-pro', + maxOutput: 128_000, + pricing: { + units: [ + { + lookup: { + prices: { + '[0, 0.272]': 30, + '[0.272, infinity]': 60, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.272]': 180, + '[0.272, infinity]': 270, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-05', + settings: { + extendParams: ['gpt5_2ProReasoningEffort'], + searchImpl: 'params', + }, + type: 'chat', + }, + { + abilities: { + functionCall: true, + vision: true, + structuredOutput: true, + }, + contextWindowTokens: 128_000, + description: + 'GPT-5.3 Chat is the latest ChatGPT model used in ChatGPT with improved conversation experiences.', + displayName: 'GPT-5.3 Chat', + enabled: true, + id: 'gpt-5.3-chat-latest', + maxOutput: 16_384, + pricing: { + units: [ + { name: 'textInput', rate: 1.75, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.175, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 14, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-04', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + structuredOutput: true, + vision: true, + }, + contextWindowTokens: 400_000, + description: + 'GPT-5.3-Codex is the most capable agentic coding model to date, optimized for agentic coding tasks in Codex or similar environments.', + displayName: 'GPT-5.3 Codex', + id: 'gpt-5.3-codex', + maxOutput: 128_000, + pricing: { + units: [ + { name: 'textInput', rate: 1.75, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.175, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 14, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-02-05', + settings: { + extendParams: ['codexMaxReasoningEffort'], + searchImpl: 'params', + }, + type: 'chat', + }, { abilities: { functionCall: true, @@ -30,7 +191,6 @@ export const openaiChatModels: AIChatModelCard[] = [ description: 'GPT-5.2 is a flagship model for coding and agentic workflows with stronger reasoning and long-context performance.', displayName: 'GPT-5.2', - enabled: true, id: 'gpt-5.2', maxOutput: 128_000, pricing: { @@ -47,6 +207,34 @@ export const openaiChatModels: AIChatModelCard[] = [ }, type: 'chat', }, + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + structuredOutput: true, + vision: true, + }, + contextWindowTokens: 400_000, + description: + 'GPT-5.2-Codex is an upgraded GPT-5.2 variant optimized for long-horizon, agentic coding tasks.', + displayName: 'GPT-5.2 Codex', + id: 'gpt-5.2-codex', + maxOutput: 128_000, + pricing: { + units: [ + { name: 'textInput', rate: 1.75, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.175, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 14, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2025-12-18', + settings: { + extendParams: ['codexMaxReasoningEffort'], + searchImpl: 'params', + }, + type: 'chat', + }, { abilities: { functionCall: true, @@ -82,7 +270,6 @@ export const openaiChatModels: AIChatModelCard[] = [ description: 'GPT-5.2 Chat is the ChatGPT variant (chat-latest) for the latest conversation improvements.', displayName: 'GPT-5.2 Chat', - enabled: true, id: 'gpt-5.2-chat-latest', maxOutput: 16_384, pricing: { @@ -142,6 +329,33 @@ export const openaiChatModels: AIChatModelCard[] = [ releasedAt: '2025-11-13', type: 'chat', }, + { + abilities: { + functionCall: true, + reasoning: true, + search: true, + vision: true, + }, + contextWindowTokens: 400_000, + description: + "GPT-5.1 Codex Max: OpenAI's most intelligent coding model, optimized for long-horizon agentic coding tasks, supports reasoning tokens.", + displayName: 'GPT-5.1 Codex Max', + id: 'gpt-5.1-codex-max', + maxOutput: 128_000, + pricing: { + units: [ + { name: 'textInput', rate: 1.25, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textInput_cacheRead', rate: 0.125, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2025-12-04', + settings: { + extendParams: ['codexMaxReasoningEffort'], + searchImpl: 'params', + }, + type: 'chat', + }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/aiModels/qwen.ts b/packages/model-bank/src/aiModels/qwen.ts index e5e3faffba..41d756b9f6 100644 --- a/packages/model-bank/src/aiModels/qwen.ts +++ b/packages/model-bank/src/aiModels/qwen.ts @@ -1352,7 +1352,7 @@ const qwenChatModels: AIChatModelCard[] = [ vision: true, }, config: { - deploymentName: 'qwen3.5-flash-2026-02-23', + deploymentName: 'qwen3.5-flash', // Supports context caching }, contextWindowTokens: 1_000_000, description: 'Fastest and lowest-cost Qwen model, ideal for simple tasks.', diff --git a/packages/model-bank/src/aiModels/siliconcloud.ts b/packages/model-bank/src/aiModels/siliconcloud.ts index a74f181180..ee2966f168 100644 --- a/packages/model-bank/src/aiModels/siliconcloud.ts +++ b/packages/model-bank/src/aiModels/siliconcloud.ts @@ -2,6 +2,248 @@ import { type AIChatModelCard, type AIImageModelCard } from '../types/aiModel'; // https://siliconflow.cn/zh-cn/models const siliconcloudChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + reasoning: true, + vision: true, + }, + contextWindowTokens: 262_144, + description: + 'Qwen3.5-397B-A17B is the latest vision-language model in the Qwen3.5 series, using a Mixture-of-Experts (MoE) architecture with 397B total parameters and 17B active parameters. It natively supports 256K context length with extensibility to approximately 1M tokens, supports 201 languages, and provides unified vision-language understanding, tool calling, and reasoning capabilities.', + displayName: 'Qwen3.5 397B A17B', + id: 'Qwen/Qwen3.5-397B-A17B', + organization: 'Qwen', + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + prices: { + '[0, 0.128]': 0.8, + '[0.128, infinity]': 2, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.128]': 4.8, + '[0.128, infinity]': 12, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-02-16', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + vision: true, + }, + contextWindowTokens: 262_144, + description: + 'Qwen3.5-122B-A10B is a native multimodal large language model from the Qwen team with 122B total parameters and only 10B active parameters. It adopts an efficient hybrid architecture combining Gated Delta Networks and Sparse Mixture-of-Experts (MoE), natively supporting 256K context length with extensibility to approximately 1M tokens.', + displayName: 'Qwen3.5 122B A10B', + id: 'Qwen/Qwen3.5-122B-A10B', + organization: 'Qwen', + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + prices: { + '[0, 0.128]': 0.8, + '[0.128, infinity]': 2, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.128]': 6.4, + '[0.128, infinity]': 16, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-02-26', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + vision: true, + }, + contextWindowTokens: 262_144, + description: + 'Qwen3.5-35B-A3B is a native multimodal large language model from the Qwen team with 35B total parameters and only 3B active parameters. It adopts an efficient hybrid architecture combining Gated Delta Networks and Sparse Mixture-of-Experts (MoE), natively supporting 256K context length with extensibility to approximately 1M tokens.', + displayName: 'Qwen3.5 35B A3B', + id: 'Qwen/Qwen3.5-35B-A3B', + organization: 'Qwen', + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + prices: { + '[0, 0.128]': 0.4, + '[0.128, infinity]': 1.6, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.128]': 3.2, + '[0.128, infinity]': 12.8, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-02-25', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + vision: true, + }, + contextWindowTokens: 262_144, + description: + 'Qwen3.5-27B is a native multimodal large language model from the Qwen team with 27B parameters. It adopts an efficient hybrid architecture combining Gated Delta Networks and Gated Attention, natively supporting 256K context length with extensibility to approximately 1M tokens.', + displayName: 'Qwen3.5 27B', + id: 'Qwen/Qwen3.5-27B', + organization: 'Qwen', + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + prices: { + '[0, 0.128]': 0.6, + '[0.128, infinity]': 1.8, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.128]': 4.8, + '[0.128, infinity]': 14.4, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-02-25', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + vision: true, + }, + contextWindowTokens: 262_144, + description: + 'Qwen3.5-9B is a native multimodal large language model from the Qwen team with 9B parameters. As a lightweight Dense model in the Qwen3.5 series, it adopts an efficient hybrid architecture combining Gated Delta Networks and Gated Attention, natively supporting 256K context length with extensibility to approximately 1M tokens.', + displayName: 'Qwen3.5 9B', + id: 'Qwen/Qwen3.5-9B', + organization: 'Qwen', + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + prices: { + '[0, 0.128]': 0.5, + '[0.128, infinity]': 1.5, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.128]': 4, + '[0.128, infinity]': 12, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-03-03', + type: 'chat', + }, + { + abilities: { + functionCall: true, + reasoning: true, + vision: true, + }, + contextWindowTokens: 262_144, + description: + 'Qwen3.5-4B is a native multimodal large language model from the Qwen team with 4B parameters, the most lightweight Dense model in the Qwen3.5 series. It adopts an efficient hybrid architecture combining Gated Delta Networks and Gated Attention, natively supporting 256K context length with extensibility to approximately 1M tokens.', + displayName: 'Qwen3.5 4B', + id: 'Qwen/Qwen3.5-4B', + organization: 'Qwen', + pricing: { + currency: 'CNY', + units: [ + { name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' }, + { name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' }, + ], + }, + releasedAt: '2026-03-03', + type: 'chat', + }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/aiModels/vertexai.ts b/packages/model-bank/src/aiModels/vertexai.ts index 2be075fe67..ebc734b69b 100644 --- a/packages/model-bank/src/aiModels/vertexai.ts +++ b/packages/model-bank/src/aiModels/vertexai.ts @@ -137,7 +137,7 @@ const vertexaiChatModels: AIChatModelCard[] = [ description: 'Gemini 3 Pro is Google’s most powerful agent and vibe-coding model, delivering richer visuals and deeper interaction on top of state-of-the-art reasoning.', displayName: 'Gemini 3 Pro Preview', - id: 'gemini-3-pro-preview', + id: 'gemini-3-pro-preview', // deprecated on 2026-03-26 maxOutput: 65_536, pricing: { units: [ diff --git a/packages/model-bank/src/aiModels/zhipu.ts b/packages/model-bank/src/aiModels/zhipu.ts index a96838ddb7..128ec9cfb8 100644 --- a/packages/model-bank/src/aiModels/zhipu.ts +++ b/packages/model-bank/src/aiModels/zhipu.ts @@ -26,7 +26,7 @@ const zhipuChatModels: AIChatModelCard[] = [ '[0, 0.032]': 1, '[0.032, infinity]': 1.5, }, - pricingParams: ['textInput', 'textOutput'], + pricingParams: ['textInput'], }, name: 'textInput_cacheRead', strategy: 'lookup', @@ -38,7 +38,7 @@ const zhipuChatModels: AIChatModelCard[] = [ '[0, 0.032]': 4, '[0.032, infinity]': 6, }, - pricingParams: ['textInput', 'textOutput'], + pricingParams: ['textInput'], }, name: 'textInput', strategy: 'lookup', @@ -50,7 +50,7 @@ const zhipuChatModels: AIChatModelCard[] = [ '[0, 0.032]': 18, '[0.032, infinity]': 22, }, - pricingParams: ['textInput', 'textOutput'], + pricingParams: ['textInput'], }, name: 'textOutput', strategy: 'lookup', diff --git a/packages/model-bank/src/types/aiModel.ts b/packages/model-bank/src/types/aiModel.ts index 5d854cc195..418b79033b 100644 --- a/packages/model-bank/src/types/aiModel.ts +++ b/packages/model-bank/src/types/aiModel.ts @@ -251,6 +251,7 @@ export type ExtendParamsType = | 'gpt5_2ReasoningEffort' | 'gpt5_2ProReasoningEffort' | 'grok4_20ReasoningEffort' + | 'codexMaxReasoningEffort' | 'textVerbosity' | 'thinking' | 'thinkingBudget' @@ -286,6 +287,7 @@ export const ExtendParamsTypeSchema = z.enum([ 'gpt5_2ReasoningEffort', 'gpt5_2ProReasoningEffort', 'grok4_20ReasoningEffort', + 'codexMaxReasoningEffort', 'textVerbosity', 'thinking', 'thinkingBudget', diff --git a/packages/model-runtime/src/const/models.ts b/packages/model-runtime/src/const/models.ts index fa8d4d7cd2..e8608e326a 100644 --- a/packages/model-runtime/src/const/models.ts +++ b/packages/model-runtime/src/const/models.ts @@ -40,9 +40,12 @@ export const responsesAPIModels = new Set([ 'gpt-5-pro-2025-10-06', 'gpt-5.1-codex', 'gpt-5.1-codex-mini', + 'gpt-5.1-codex-max', 'gpt-5.2', + 'gpt-5.2-codex', 'gpt-5.2-pro-2025-12-11', 'gpt-5.2-pro', + 'gpt-5.3-codex', 'gpt-5.4', 'gpt-5.4-pro', ]); diff --git a/packages/types/src/agent/chatConfig.ts b/packages/types/src/agent/chatConfig.ts index 9769f82a1b..8594539fa3 100644 --- a/packages/types/src/agent/chatConfig.ts +++ b/packages/types/src/agent/chatConfig.ts @@ -19,10 +19,12 @@ export interface AgentMemoryChatConfig { export interface LobeAgentChatConfig extends AgentMemoryChatConfig { autoCreateTopicThreshold: number; + codexMaxReasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'; /** * Model ID to use for generating compression summaries */ compressionModelId?: string; + /** * Disable context caching */ @@ -34,7 +36,6 @@ export interface LobeAgentChatConfig extends AgentMemoryChatConfig { * Whether to enable adaptive thinking (Claude Opus 4.6) */ enableAdaptiveThinking?: boolean; - enableAutoCreateTopic?: boolean; /** * Whether to auto-scroll during AI streaming output @@ -148,6 +149,7 @@ export const MemoryChatConfigSchema = z.object({ export const AgentChatConfigSchema = z .object({ autoCreateTopicThreshold: z.number().default(2), + codexMaxReasoningEffort: z.enum(['low', 'medium', 'high', 'xhigh']).optional(), compressionModelId: z.string().optional(), disableContextCaching: z.boolean().optional(), effort: z.enum(['low', 'medium', 'high', 'max']).optional(), diff --git a/src/features/ModelSwitchPanel/components/ControlsForm/CodexMaxReasoningEffortSlider.tsx b/src/features/ModelSwitchPanel/components/ControlsForm/CodexMaxReasoningEffortSlider.tsx new file mode 100644 index 0000000000..c83215312c --- /dev/null +++ b/src/features/ModelSwitchPanel/components/ControlsForm/CodexMaxReasoningEffortSlider.tsx @@ -0,0 +1,16 @@ +import { type CreatedLevelSliderProps } from './createLevelSlider'; +import { createLevelSliderComponent } from './createLevelSlider'; + +const CODEX_MAX_REASONING_EFFORT_LEVELS = ['low', 'medium', 'high', 'xhigh'] as const; +type CodexMaxReasoningEffort = (typeof CODEX_MAX_REASONING_EFFORT_LEVELS)[number]; + +export type CodexMaxReasoningEffortSliderProps = CreatedLevelSliderProps; + +const CodexMaxReasoningEffortSlider = createLevelSliderComponent({ + configKey: 'codexMaxReasoningEffort', + defaultValue: 'medium', + levels: CODEX_MAX_REASONING_EFFORT_LEVELS, + style: { minWidth: 200 }, +}); + +export default CodexMaxReasoningEffortSlider; diff --git a/src/features/ModelSwitchPanel/components/ControlsForm/ControlsForm.tsx b/src/features/ModelSwitchPanel/components/ControlsForm/ControlsForm.tsx index b065fa8f1a..d94bc84d7f 100644 --- a/src/features/ModelSwitchPanel/components/ControlsForm/ControlsForm.tsx +++ b/src/features/ModelSwitchPanel/components/ControlsForm/ControlsForm.tsx @@ -11,6 +11,7 @@ import { useAgentStore } from '@/store/agent'; import { agentByIdSelectors, chatConfigByIdSelectors } from '@/store/agent/selectors'; import { aiModelSelectors, useAiInfraStore } from '@/store/aiInfra'; +import CodexMaxReasoningEffortSlider from './CodexMaxReasoningEffortSlider'; import ContextCachingSwitch from './ContextCachingSwitch'; import EffortSlider from './EffortSlider'; import GPT5ReasoningEffortSlider from './GPT5ReasoningEffortSlider'; @@ -218,6 +219,17 @@ const ControlsForm = memo(({ model: modelProp, provider: prov paddingBottom: 0, }, }, + { + children: , + desc: 'reasoning_effort', + label: t('extendParams.reasoningEffort.title'), + layout: 'horizontal', + minWidth: undefined, + name: 'codexMaxReasoningEffort', + style: { + paddingBottom: 0, + }, + }, { children: , desc: 'text_verbosity', diff --git a/src/locales/default/modelProvider.ts b/src/locales/default/modelProvider.ts index 025b9e36d6..d256bfa353 100644 --- a/src/locales/default/modelProvider.ts +++ b/src/locales/default/modelProvider.ts @@ -245,6 +245,8 @@ export default { 'providerModels.item.modelConfig.displayName.title': 'Model Display Name', 'providerModels.item.modelConfig.extendParams.extra': 'Choose extended parameters supported by the model. Hover an option to preview controls. Incorrect configs may cause request failures.', + 'providerModels.item.modelConfig.extendParams.options.codexMaxReasoningEffort.hint': + 'For Codex models; controls reasoning intensity.', 'providerModels.item.modelConfig.extendParams.options.disableContextCaching.hint': 'For Claude models; can lower cost and speed up responses.', 'providerModels.item.modelConfig.extendParams.options.effort.hint': diff --git a/src/routes/(main)/settings/provider/features/ModelList/CreateNewModelModal/ExtendParamsSelect.tsx b/src/routes/(main)/settings/provider/features/ModelList/CreateNewModelModal/ExtendParamsSelect.tsx index 909926d0fd..7dc7e8d58f 100644 --- a/src/routes/(main)/settings/provider/features/ModelList/CreateNewModelModal/ExtendParamsSelect.tsx +++ b/src/routes/(main)/settings/provider/features/ModelList/CreateNewModelModal/ExtendParamsSelect.tsx @@ -5,6 +5,7 @@ import { type ReactNode } from 'react'; import { memo, useMemo } from 'react'; import { Trans, useTranslation } from 'react-i18next'; +import CodexMaxReasoningEffortSlider from '@/features/ModelSwitchPanel/components/ControlsForm/CodexMaxReasoningEffortSlider'; import EffortSlider from '@/features/ModelSwitchPanel/components/ControlsForm/EffortSlider'; import GPT5ReasoningEffortSlider from '@/features/ModelSwitchPanel/components/ControlsForm/GPT5ReasoningEffortSlider'; import GPT51ReasoningEffortSlider from '@/features/ModelSwitchPanel/components/ControlsForm/GPT51ReasoningEffortSlider'; @@ -76,6 +77,10 @@ const EXTEND_PARAMS_OPTIONS: ExtendParamsOption[] = [ hintKey: 'providerModels.item.modelConfig.extendParams.options.grok4_20ReasoningEffort.hint', key: 'grok4_20ReasoningEffort', }, + { + hintKey: 'providerModels.item.modelConfig.extendParams.options.codexMaxReasoningEffort.hint', + key: 'codexMaxReasoningEffort', + }, { hintKey: 'providerModels.item.modelConfig.extendParams.options.textVerbosity.hint', key: 'textVerbosity', @@ -133,6 +138,7 @@ const EXTEND_PARAMS_OPTIONS: ExtendParamsOption[] = [ // Map variant keys to their base i18n title key (synced with ControlsForm.tsx) // This allows reusing existing i18n translations instead of adding new ones const TITLE_KEY_ALIASES: Partial> = { + codexMaxReasoningEffort: 'reasoningEffort', gpt5ReasoningEffort: 'reasoningEffort', gpt5_1ReasoningEffort: 'reasoningEffort', gpt5_2ProReasoningEffort: 'reasoningEffort', @@ -153,6 +159,11 @@ type PreviewMeta = { }; const PREVIEW_META: Partial> = { + codexMaxReasoningEffort: { + labelSuffix: ' (Codex)', + previewWidth: 300, + tag: 'reasoning_effort', + }, disableContextCaching: { labelSuffix: ' (Claude)', previewWidth: 400 }, effort: { labelSuffix: ' (Opus 4.6)', previewWidth: 280, tag: 'output_config.effort' }, enableAdaptiveThinking: { @@ -271,6 +282,7 @@ const ExtendParamsSelect = memo(({ value, onChange }) = // Preview controls use controlled mode with default values (no store access) const previewControls = useMemo>>( () => ({ + codexMaxReasoningEffort: , disableContextCaching: , effort: , enableAdaptiveThinking: , diff --git a/src/services/chat/mecha/modelParamsResolver.ts b/src/services/chat/mecha/modelParamsResolver.ts index cf3b229479..ecfa045ddb 100644 --- a/src/services/chat/mecha/modelParamsResolver.ts +++ b/src/services/chat/mecha/modelParamsResolver.ts @@ -126,6 +126,10 @@ export const resolveModelExtendParams = (ctx: ModelParamsContext): ModelExtendPa extendParams.reasoning_effort = chatConfig.grok4_20ReasoningEffort; } + if (modelExtendParams.includes('codexMaxReasoningEffort') && chatConfig.codexMaxReasoningEffort) { + extendParams.reasoning_effort = chatConfig.codexMaxReasoningEffort; + } + if (modelExtendParams.includes('effort') && chatConfig.effort) { extendParams.effort = chatConfig.effort; }