✨ feat(siliconcloud): add Qwen3.5 series models (#12785)

2026-03-26 13:19:34 +07:00 · 2026-03-10 09:58:37 +08:00
parent e753856abf
commit 7de2a68d20
2 changed files with 266 additions and 0 deletions
--- a/locales/zh-CN/models.json
+++ b/locales/zh-CN/models.json
@@ -1083,6 +1083,12 @@
  "qwen/qwen3-max-preview.description": "Qwen3 Max（预览版）是面向高级推理与工具集成的 Max 版本。",
  "qwen/qwen3-max.description": "Qwen3 Max 是 Qwen3 系列的高端推理模型，专注于多语言推理与工具集成。",
  "qwen/qwen3-vl-plus.description": "Qwen3 VL-Plus 是 Qwen3 的视觉增强版本，具备更强的多模态推理与视频处理能力。",
+  "qwen/qwen3.5-397b-a17b.description": "Qwen3.5-397B-A17B 是通义千问系列的最新视觉语言模型，采用混合专家架构（MoE），总参数量 397B，激活参数 17B。模型原生支持 256K 上下文长度，可扩展至约 1M tokens。支持 201 种语言，具备统一的视觉语言理解能力、工具调用和推理思考模式。",
+  "qwen/qwen3.5-122b-a10b.description": "Qwen3.5-122B-A10B 是通义千问团队推出的原生多模态大语言模型，总参数量 122B，激活参数量仅 10B。该模型采用门控 Delta 网络与稀疏混合专家（MoE）相结合的高效混合架构，原生支持 256K 上下文长度，并可扩展至约 100 万 tokens。模型通过早期融合训练实现了统一的视觉语言基础能力，支持文本、图像和视频理解，在知识、推理、编程、智能体、视觉理解和多语言等多项基准测试中表现优异，并在多项指标上超越 GPT-5-mini 和 Qwen3-235B-A22B。模型默认启用思考模式（Thinking Mode），支持工具调用，并覆盖 201 种语言和方言。",
+  "qwen/qwen3.5-35b-a3b.description": "Qwen3.5-35B-A3B 是通义千问团队推出的原生多模态大语言模型，总参数量 35B，激活参数量仅 3B。该模型采用门控 Delta 网络与稀疏混合专家（MoE）相结合的高效混合架构，原生支持 256K 上下文长度，并可扩展至约 100 万 tokens。模型通过早期融合训练实现了统一的视觉语言基础能力，支持文本、图像和视频理解，在推理、编程、智能体和视觉理解等多项基准测试中表现优异。模型默认启用思考模式（Thinking Mode），支持工具调用，并覆盖 201 种语言和方言。",
+  "qwen/qwen3.5-27b.description": "Qwen3.5-27B 是通义千问团队推出的原生多模态大语言模型，拥有 27B 参数。该模型采用门控 Delta 网络与门控注意力相结合的高效混合架构，原生支持 256K 上下文长度，并可扩展至约 100 万 tokens。模型通过早期融合训练实现了统一的视觉语言基础能力，支持文本、图像和视频理解，在推理、编程、智能体和视觉理解等多项基准测试中表现优异，多项指标超越 Qwen3-235B-A22B 和 GPT-5-mini。模型默认启用思考模式（Thinking Mode），支持工具调用，并覆盖 201 种语言和方言。",
+  "qwen/qwen3.5-9b.description": "Qwen3.5-9B 是通义千问团队推出的原生多模态大语言模型，拥有 9B 参数。作为 Qwen3.5 系列的轻量级 Dense 模型，它采用门控 Delta 网络与门控注意力相结合的高效混合架构，原生支持 256K 上下文长度，并可扩展至约 100 万 tokens。模型通过早期融合训练实现了统一的视觉语言基础能力，支持文本、图像和视频理解。模型默认启用思考模式（Thinking Mode），支持工具调用，并覆盖 201 种语言和方言。",
+  "qwen/qwen3.5-4b.description": "Qwen3.5-4B 是通义千问团队推出的原生多模态大语言模型，拥有 4B 参数，是 Qwen3.5 系列中最轻量的 Dense 模型。该模型采用门控 Delta 网络与门控注意力相结合的高效混合架构，原生支持 256K 上下文长度，并可扩展至约 100 万 tokens。模型通过早期融合训练实现了统一的视觉语言基础能力，支持文本、图像和视频理解，在同规模模型中表现优异，多项指标超越 GPT-5-Nano 和 Gemini-2.5-Flash-Lite。模型默认启用思考模式（Thinking Mode），支持工具调用，并覆盖 201 种语言和方言。",
  "qwen2.5-14b-instruct-1m.description": "Qwen2.5 开源 72B 模型。",
  "qwen2.5-14b-instruct.description": "Qwen2.5 开源 14B 模型。",
  "qwen2.5-32b-instruct.description": "Qwen2.5 开源 32B 模型。",
--- a/packages/model-bank/src/aiModels/siliconcloud.ts
+++ b/packages/model-bank/src/aiModels/siliconcloud.ts
@@ -198,6 +198,266 @@ const siliconcloudChatModels: AIChatModelCard[] = [
    },
    type: 'chat',
  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+      video: true,
+    },
+    contextWindowTokens: 262_144,
+    description:
+      'Qwen3.5-397B-A17B is the latest vision-language model in the Qwen series, featuring a Mixture of Experts (MoE) architecture with 397B total parameters and 17B active parameters. The model natively supports a 256K context length, extendable to approximately 1M tokens. It supports 201 languages and offers unified vision-language understanding capabilities, tool calling, and reasoning thinking modes.',
+    displayName: 'Qwen3.5 397B A17B',
+    id: 'qwen/qwen3.5-397b-a17b',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 0.8,
+              '[0.131072, infinity]': 2.0,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textInput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 4.8,
+              '[0.131072, infinity]': 12.0,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textOutput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-03',
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+      video: true,
+    },
+    contextWindowTokens: 262_144,
+    description:
+      'Qwen3.5-122B-A10B is a native multimodal large language model developed by the Qwen team, with 122B total parameters and only 10B active parameters. The model employs a highly efficient hybrid architecture combining Gated Delta Networks with Sparse Mixture of Experts (MoE). It natively supports 256K context length, extendable to approximately 1 million tokens. Through early fusion training, the model achieves unified vision-language foundational capabilities, supporting text, image, and video understanding. It delivers excellent performance across multiple benchmarks including knowledge, reasoning, coding, agents, visual understanding, and multilingual tasks, surpassing GPT-5-mini and Qwen3-235B-A22B on several metrics. The model has Thinking Mode enabled by default, supports tool calling, and covers 201 languages and dialects.',
+    displayName: 'Qwen3.5 122B A10B',
+    id: 'qwen/qwen3.5-122b-a10b',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 0.8,
+              '[0.131072, infinity]': 2.0,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textInput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 6.4,
+              '[0.131072, infinity]': 16.0,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textOutput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-03',
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+      video: true,
+    },
+    contextWindowTokens: 262_144,
+    description:
+      'Qwen3.5-35B-A3B is a native multimodal large language model developed by the Qwen team, with 35B total parameters and only 3B active parameters. The model employs a highly efficient hybrid architecture combining Gated Delta Networks with Sparse Mixture of Experts (MoE). It natively supports 256K context length, extendable to approximately 1 million tokens. Through early fusion training, the model achieves unified vision-language foundational capabilities, supporting text, image, and video understanding. It delivers excellent performance across multiple benchmarks including reasoning, coding, agents, and visual understanding. The model has Thinking Mode enabled by default, supports tool calling, and covers 201 languages and dialects.',
+    displayName: 'Qwen3.5 35B A3B',
+    id: 'qwen/qwen3.5-35b-a3b',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 0.4,
+              '[0.131072, infinity]': 1.6,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textInput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 3.2,
+              '[0.131072, infinity]': 12.8,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textOutput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-03',
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+      video: true,
+    },
+    contextWindowTokens: 262_144,
+    description:
+      'Qwen3.5-27B is a native multimodal large language model developed by the Qwen team with 27B parameters. The model employs a highly efficient hybrid architecture combining Gated Delta Networks with Gated Attention. It natively supports 256K context length, extendable to approximately 1 million tokens. Through early fusion training, the model achieves unified vision-language foundational capabilities, supporting text, image, and video understanding. It delivers excellent performance across multiple benchmarks including reasoning, coding, agents, and visual understanding, surpassing Qwen3-235B-A22B and GPT-5-mini on several metrics. The model has Thinking Mode enabled by default, supports tool calling, and covers 201 languages and dialects.',
+    displayName: 'Qwen3.5 27B',
+    id: 'qwen/qwen3.5-27b',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 0.6,
+              '[0.131072, infinity]': 1.8,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textInput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 4.8,
+              '[0.131072, infinity]': 14.4,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textOutput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-03',
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+      video: true,
+    },
+    contextWindowTokens: 262_144,
+    description:
+      'Qwen3.5-9B is a native multimodal large language model developed by the Qwen team with 9B parameters. As the lightweight Dense model in the Qwen3.5 series, it employs a highly efficient hybrid architecture combining Gated Delta Networks with Gated Attention. It natively supports 256K context length, extendable to approximately 1 million tokens. Through early fusion training, the model achieves unified vision-language foundational capabilities, supporting text, image, and video understanding. The model has Thinking Mode enabled by default, supports tool calling, and covers 201 languages and dialects.',
+    displayName: 'Qwen3.5 9B',
+    id: 'qwen/qwen3.5-9b',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 0.5,
+              '[0.131072, infinity]': 1.5,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textInput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+        {
+          lookup: {
+            prices: {
+              '[0, 0.131072]': 4.0,
+              '[0.131072, infinity]': 12.0,
+            },
+            pricingParams: ['textInput'],
+          },
+          name: 'textOutput',
+          strategy: 'lookup',
+          unit: 'millionTokens',
+        },
+      ],
+    },
+    releasedAt: '2026-03-03',
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      functionCall: true,
+      reasoning: true,
+      vision: true,
+      video: true,
+    },
+    contextWindowTokens: 262_144,
+    description:
+      'Qwen3.5-4B is a native multimodal large language model developed by the Qwen team with 4B parameters, making it the lightest Dense model in the Qwen3.5 series. The model employs a highly efficient hybrid architecture combining Gated Delta Networks with Gated Attention. It natively supports 256K context length, extendable to approximately 1 million tokens. Through early fusion training, the model achieves unified vision-language foundational capabilities, supporting text, image, and video understanding. It delivers excellent performance among models of similar size, surpassing GPT-5-Nano and Gemini-2.5-Flash-Lite on several metrics. The model has Thinking Mode enabled by default, supports tool calling, and covers 201 languages and dialects.',
+    displayName: 'Qwen3.5 4B',
+    id: 'qwen/qwen3.5-4b',
+    pricing: {
+      currency: 'CNY',
+      units: [
+        { name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    },
+    releasedAt: '2026-03-03',
+    settings: {
+      extendParams: ['enableReasoning', 'reasoningBudgetToken'],
+    },
+    type: 'chat',
+  },
  {
    abilities: {
      vision: true,