💄 style: Add MiniMax-M2 model (#9897)

*  feat: 更新多个模型的描述、定价和能力,添加新模型支持

*  fix: 处理 <think> 标签的内容分割,优化思考状态管理

*  test: 添加测试以处理包含 </think> 标签的内容分割

* fix test
This commit is contained in:
sxjeru
2025-10-27 17:39:20 +08:00
committed by GitHub
parent ed00394c6e
commit d6fded2973
12 changed files with 325 additions and 99 deletions

View File

@@ -944,14 +944,18 @@ const aihubmixModels: AIChatModelCard[] = [
vision: true,
},
contextWindowTokens: 32_768 + 8192,
description: 'Gemini 2.5 Flash 实验模型,支持图像生成',
description:
'Nano Banana 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。',
displayName: 'Nano Banana',
id: 'gemini-2.5-flash-image-preview',
enabled: true,
id: 'gemini-2.5-flash-image',
maxOutput: 8192,
pricing: {
units: [
{ name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 30, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'imageInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'imageOutput', rate: 30, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-08-26',

View File

@@ -7,10 +7,34 @@ const minimaxChatModels: AIChatModelCard[] = [
reasoning: true,
search: true,
},
contextWindowTokens: 1_000_192,
description: '全新自研推理模型。全球领先80K思维链 x 1M输入效果比肩海外顶尖模型。',
displayName: 'MiniMax-M1',
contextWindowTokens: 204_800,
description: '专为高效编码与Agent工作流而生',
displayName: 'MiniMax M2',
enabled: true,
id: 'MiniMax-M2',
maxOutput: 131_072,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-10-27',
settings: {
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
},
contextWindowTokens: 1_000_192,
description: '全新自研推理模型。全球领先80K 思维链 x 1M 输入,效果比肩海外顶尖模型',
displayName: 'MiniMax M1',
id: 'MiniMax-M1',
maxOutput: 40_000,
pricing: {
@@ -35,8 +59,7 @@ const minimaxChatModels: AIChatModelCard[] = [
contextWindowTokens: 1_000_192,
description:
'在 MiniMax-01系列模型中我们做了大胆创新首次大规模实现线性注意力机制传统 Transformer架构不再是唯一的选择。这个模型的参数量高达4560亿其中单次激活459亿。模型综合性能比肩海外顶尖模型同时能够高效处理全球最长400万token的上下文是GPT-4o的32倍Claude-3.5-Sonnet的20倍。',
displayName: 'MiniMax-Text-01',
enabled: true,
displayName: 'MiniMax Text 01',
id: 'MiniMax-Text-01',
maxOutput: 40_000,
pricing: {

View File

@@ -519,6 +519,7 @@ const novitaChatModels: AIChatModelCard[] = [
contextWindowTokens: 1_048_576,
displayName: 'Llama 4 Maverick 17B Instruct',
id: 'meta-llama/llama-4-maverick-17b-128e-instruct-fp8',
maxOutput: 8192,
pricing: {
units: [
{ name: 'textInput', rate: 0.17, strategy: 'fixed', unit: 'millionTokens' },
@@ -593,19 +594,6 @@ const novitaChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
contextWindowTokens: 32_768,
description: 'Gemma 3 1B 是谷歌的一款开源语言模型,以其在效率和性能方面设立了新的标准。',
displayName: 'Gemma 3 1B',
id: 'google/gemma-3-1b-it',
pricing: {
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
},
{
contextWindowTokens: 60_288,
description: 'Mistral Nemo 是多语言支持和高性能编程的7.3B参数模型。',
@@ -876,18 +864,6 @@ const novitaChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
contextWindowTokens: 131_000,
displayName: 'Llama 3.2 1B Instruct',
id: 'meta-llama/llama-3.2-1b-instruct',
pricing: {
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
type: 'chat',
},
{
abilities: {
functionCall: true,

View File

@@ -2,6 +2,149 @@ import { AIChatModelCard, AIImageModelCard } from '../types/aiModel';
// https://siliconflow.cn/zh-cn/models
const siliconcloudChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
vision: true,
},
contextWindowTokens: 262_144,
description:
'Qwen3-VL-32B-Instruct 是阿里巴巴通义千问团队推出的视觉语言模型,在多个视觉语言基准测试中取得了领先的 SOTA 性能。该模型支持百万像素级别的高分辨率图像输入,并具备强大的通用视觉理解、多语言 OCR、细粒度视觉定位和视觉对话能力。作为 Qwen3 系列中的视觉语言模型,它能够处理复杂的多模态任务,支持工具调用和前缀续写等高级功能。',
displayName: 'Qwen3 VL 32B Instruct',
id: 'Qwen/Qwen3-VL-32B-Instruct',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-10-21',
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
contextWindowTokens: 262_144,
description:
'Qwen3-VL-32B-Thinking 是阿里巴巴通义千问团队推出的视觉语言模型中一个为复杂视觉推理任务特别优化的版本。该模型内置了"思考模式",使其在回答问题前能够生成详细的中间推理步骤,从而显著增强其在需要多步逻辑、规划和复杂推理的任务上的表现。该模型支持百万像素级别的高分辨率图像输入,具备强大的通用视觉理解、多语言 OCR、细粒度视觉定位和视觉对话能力并支持工具调用和前缀续写等功能。',
displayName: 'Qwen3 VL 32B Thinking',
id: 'Qwen/Qwen3-VL-32B-Thinking',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-10-21',
settings: {
extendParams: ['reasoningBudgetToken'],
},
type: 'chat',
},
{
abilities: {
vision: true,
},
contextWindowTokens: 8_192,
description:
'DeepSeek-OCR 是由深度求索DeepSeek AI推出的一个视觉语言模型专注于光学字符识别OCR与"上下文光学压缩"。该模型旨在探索从图像中压缩上下文信息的边界,能够高效处理文档并将其转换为如 Markdown 等结构化文本格式。它能够准确识别图像中的文字内容,特别适用于文档数字化、文字提取和结构化处理等应用场景。',
displayName: 'DeepSeek OCR',
id: 'deepseek-ai/DeepSeek-OCR',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-10-20',
type: 'chat',
},
{
abilities: {
functionCall: true,
vision: true,
},
contextWindowTokens: 65_536,
description:
'Qwen3-Omni-30B-A3B-Instruct 是阿里巴巴通义千问团队最新 Qwen3 系列中的一员。它是一个拥有 300 亿总参数和 30 亿激活参数的混合专家MoE模型在保持强大性能的同时有效降低了推理成本。该模型在高质量、多来源、多语言的数据上进行训练具备强大的通用能力支持全模态输入处理包括文本、图像、音频和视频能够理解和生成跨模态的内容。',
displayName: 'Qwen3 Omni 30B A3B Instruct',
id: 'Qwen/Qwen3-Omni-30B-A3B-Instruct',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 0.7, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.8, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-09-22',
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
vision: true,
},
contextWindowTokens: 65_536,
description:
'Qwen3-Omni-30B-A3B-Thinking 是 Qwen3-Omni 全模态模型中的核心"思考者"Thinker组件。它专门负责处理包括文本、音频、图像和视频在内的多模态输入并执行复杂的思维链推理。作为推理的大脑该模型将所有输入统一到通用的表征空间中实现跨模态的深度理解和复杂推理能力。该模型基于混合专家MoE架构拥有 300 亿总参数和 30 亿激活参数,能够在保持强大推理能力的同时优化计算效率。',
displayName: 'Qwen3 Omni 30B A3B Thinking',
id: 'Qwen/Qwen3-Omni-30B-A3B-Thinking',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 0.7, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.8, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-09-22',
settings: {
extendParams: ['reasoningBudgetToken'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,
vision: true,
},
contextWindowTokens: 65_536,
description:
'Qwen3-Omni-30B-A3B-Captioner 是阿里巴巴通义千问团队 Qwen3 系列中的一款视觉语言模型VLM。它专门用于生成高质量、详细且准确的图像描述。该模型基于 300 亿总参数的混合专家MoE架构能够深入理解图像内容并将其转化为自然流畅的文字描述。它在图像细节捕捉、场景理解、物体识别和关系推理等方面表现卓越特别适合需要精确图像理解和描述生成的应用场景。',
displayName: 'Qwen3 Omni 30B A3B Captioner',
id: 'Qwen/Qwen3-Omni-30B-A3B-Captioner',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 0.7, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.8, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-09-22',
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
'混元翻译模型Hunyuan Translation Model由一个翻译模型 Hunyuan-MT-7B 和一个集成模型 Hunyuan-MT-Chimera 组成。Hunyuan-MT-7B 是一个拥有 70 亿参数的轻量级翻译模型,用于将源文本翻译成目标语言。该模型支持 33 种语言以及 5 种中国少数民族语言的互译。在 WMT25 国际机器翻译竞赛中Hunyuan-MT-7B 在其参与的 31 个语言类别中获得了 30 个第一名,展现了其卓越的翻译能力。针对翻译场景,腾讯混元提出了一个从预训练到监督微调、再到翻译强化和集成强化的完整训练范式,使其在同等规模的模型中达到了业界领先的性能。该模型计算效率高、易于部署,适合多种应用场景。',
displayName: 'Hunyuan MT 7B',
id: 'tencent/Hunyuan-MT-7B',
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-09-01',
type: 'chat',
},
{
abilities: {
functionCall: true,

View File

@@ -1214,6 +1214,86 @@ describe('OpenAIStream', () => {
);
});
it('should split reasoning and text when </think> and content are in the same chunk', async () => {
const data = [
{
id: '1',
object: 'chat.completion.chunk',
created: 1737563070,
model: 'minimax-m2',
choices: [
{
index: 0,
delta: { content: '<think>思考开始' },
finish_reason: null,
logprobs: null,
},
],
},
{
id: '1',
object: 'chat.completion.chunk',
created: 1737563071,
model: 'minimax-m2',
choices: [
{
index: 0,
delta: { content: '思考进行中' },
finish_reason: null,
logprobs: null,
},
],
},
{
id: '1',
object: 'chat.completion.chunk',
created: 1737563072,
model: 'minimax-m2',
choices: [
{
index: 0,
delta: { content: '思考结束</think>这是最终回答' },
finish_reason: null,
logprobs: null,
},
],
},
];
const mockOpenAIStream = new ReadableStream({
start(controller) {
data.forEach((chunk) => controller.enqueue(chunk));
controller.close();
},
});
const protocolStream = OpenAIStream(mockOpenAIStream);
const decoder = new TextDecoder();
const chunks = [];
// @ts-ignore
for await (const chunk of protocolStream) {
chunks.push(decoder.decode(chunk, { stream: true }));
}
expect(chunks).toEqual(
[
'id: 1',
'event: reasoning',
`data: "思考开始"\n`,
'id: 1',
'event: reasoning',
`data: "思考进行中"\n`,
'id: 1',
'event: reasoning',
`data: "思考结束"\n`,
'id: 1',
'event: text',
`data: "这是最终回答"\n`,
].map((i) => `${i}\n`),
);
});
it('should handle reasoning event in official DeepSeek api', async () => {
const data = [
{

View File

@@ -300,22 +300,52 @@ const transformOpenAIStream = (
}
if (typeof content === 'string') {
// 清除 <think> 及 </think> 标签
const thinkingContent = content.replaceAll(/<\/?think>/g, '');
// 判断是否有 <think> 或 </think> 标签,更新 thinkingInContent 状态
if (content.includes('<think>')) {
streamContext.thinkingInContent = true;
} else if (content.includes('</think>')) {
streamContext.thinkingInContent = false;
}
// 如果 content 是空字符串但 chunk 带有 usage则优先返回 usage例如 Gemini image-preview 最终会在单独的 chunk 中返回 usage
if (content === '' && chunk.usage) {
const usage = chunk.usage;
return { data: convertOpenAIUsage(usage, payload), id: chunk.id, type: 'usage' };
}
// 处理包含 </think> 标签的特殊情况:需要分割内容
if (content.includes('</think>')) {
const parts = content.split('</think>');
const beforeThink = parts[0].replaceAll('<think>', ''); // 移除可能的 <think> 标签
const afterThink = parts.slice(1).join('</think>'); // 处理可能有多个 </think> 的情况
const results: StreamProtocolChunk[] = [];
// </think> 之前的内容(如果有)作为 reasoning
if (beforeThink) {
results.push({
data: beforeThink,
id: chunk.id,
type: 'reasoning',
});
}
// 更新状态:已经结束思考模式
streamContext.thinkingInContent = false;
// </think> 之后的内容(如果有)作为 text
if (afterThink) {
results.push({
data: afterThink,
id: chunk.id,
type: 'text',
});
}
return results.length > 0 ? results : { data: '', id: chunk.id, type: 'text' };
}
// 清除 <think> 标签(不需要分割,因为 <think> 标签后续内容都是 reasoning
const thinkingContent = content.replaceAll(/<\/?think>/g, '');
// 判断是否有 <think> 标签,更新 thinkingInContent 状态
if (content.includes('<think>')) {
streamContext.thinkingInContent = true;
}
// 判断是否有 citations 内容,更新 returnedCitation 状态
if (!streamContext?.returnedCitation) {
const citations =

View File

@@ -5,7 +5,7 @@ import { testProvider } from '../../providerTestUtils';
import { LobeMinimaxAI } from './index';
const provider = ModelProvider.Minimax;
const defaultBaseURL = 'https://api.minimax.chat/v1';
const defaultBaseURL = 'https://api.minimaxi.com/v1';
testProvider({
Runtime: LobeMinimaxAI,

View File

@@ -10,18 +10,18 @@ export const getMinimaxMaxOutputs = (modelId: string): number | undefined => {
};
export const LobeMinimaxAI = createOpenAICompatibleRuntime({
baseURL: 'https://api.minimax.chat/v1',
baseURL: 'https://api.minimaxi.com/v1',
chatCompletion: {
handlePayload: (payload) => {
const { enabledSearch, max_tokens, temperature, tools, top_p, ...params } = payload;
const minimaxTools = enabledSearch
? [
...(tools || []),
{
type: 'web_search',
},
]
...(tools || []),
{
type: 'web_search',
},
]
: tools;
// Resolve parameters with constraints
@@ -33,7 +33,7 @@ export const LobeMinimaxAI = createOpenAICompatibleRuntime({
},
{
normalizeTemperature: true,
topPRange: { max: 1, min: 0 },
topPRange: { max: 1, min: 0.01 },
},
);

View File

@@ -6,7 +6,7 @@ import { testProvider } from '../../providerTestUtils';
import { LobeQiniuAI, params } from './index';
const provider = ModelProvider.Qiniu;
const defaultBaseURL = 'https://api.qnaigc.com/v1';
const defaultBaseURL = 'https://openai.qiniu.com/v1';
testProvider({
Runtime: LobeQiniuAI,
@@ -275,7 +275,7 @@ describe('LobeQiniuAI - custom features', () => {
it('should export params object', () => {
expect(params).toBeDefined();
expect(params.provider).toBe(ModelProvider.Qiniu);
expect(params.baseURL).toBe('https://api.qnaigc.com/v1');
expect(params.baseURL).toBe('https://openai.qiniu.com/v1');
});
it('should export LobeQiniuAI class', () => {

View File

@@ -6,19 +6,18 @@ import {
} from '../../core/openaiCompatibleFactory';
import { processMultiProviderModelList } from '../../utils/modelParse';
export interface QiniuModelCard {
id: string;
}
export const params = {
apiKey: 'placeholder-to-avoid-error',
baseURL: 'https://api.qnaigc.com/v1',
baseURL: 'https://openai.qiniu.com/v1',
debug: {
chatCompletion: () => process.env.DEBUG_QINIU_CHAT_COMPLETION === '1',
},
models: async ({ client }) => {
const modelsPage = (await client.models.list()) as any;
const modelList: QiniuModelCard[] = modelsPage.data;
const modelList = modelsPage.data.map((model: any) => {
const { created, ...rest } = model;
return rest;
});
// 自动检测模型提供商并选择相应配置
return processMultiProviderModelList(modelList, 'qiniu');

View File

@@ -32,6 +32,7 @@ export const MODEL_LIST_CONFIGS = {
deepseek: {
functionCallKeywords: ['v3', 'r1', 'deepseek-chat'],
reasoningKeywords: ['r1', 'deepseek-reasoner', 'v3.1', 'v3.2'],
visionKeywords: ['ocr'],
},
google: {
excludeKeywords: ['tts'],
@@ -57,6 +58,11 @@ export const MODEL_LIST_CONFIGS = {
reasoningKeywords: ['thinking'],
visionKeywords: [],
},
minimax: {
functionCallKeywords: ['minimax'],
reasoningKeywords: ['-m'],
visionKeywords: ['-vl', 'Text-01'],
},
moonshot: {
functionCallKeywords: ['moonshot', 'kimi'],
reasoningKeywords: ['thinking'],
@@ -80,7 +86,7 @@ export const MODEL_LIST_CONFIGS = {
'qwen3',
],
reasoningKeywords: ['qvq', 'qwq', 'qwen3', '!-instruct-', '!-coder-', '!-max-'],
visionKeywords: ['qvq', 'vl'],
visionKeywords: ['qvq', '-vl', '-omni'],
},
v0: {
functionCallKeywords: ['v0'],
@@ -117,6 +123,7 @@ export const MODEL_OWNER_DETECTION_CONFIG = {
inclusionai: ['ling-', 'ming-', 'ring-'],
llama: ['llama', 'llava'],
longcat: ['longcat'],
minimax: ['minimax'],
moonshot: ['moonshot', 'kimi'],
openai: ['o1', 'o3', 'o4', 'gpt-'],
qwen: ['qwen', 'qwq', 'qvq'],

View File

@@ -2,51 +2,15 @@ import { ModelProviderCard } from '@/types/llm';
// ref: https://platform.minimaxi.com/document/Models
const Minimax: ModelProviderCard = {
chatModels: [
{
contextWindowTokens: 245_760,
description: '适用于广泛的自然语言处理任务,包括文本生成、对话系统等。',
displayName: 'abab6.5s',
enabled: true,
functionCall: true,
id: 'abab6.5s-chat',
},
{
contextWindowTokens: 8192,
description: '专为多语种人设对话设计,支持英文及其他多种语言的高质量对话生成。',
displayName: 'abab6.5g',
enabled: true,
functionCall: true,
id: 'abab6.5g-chat',
},
{
contextWindowTokens: 8192,
description: '针对中文人设对话场景优化,提供流畅且符合中文表达习惯的对话生成能力。',
displayName: 'abab6.5t',
enabled: true,
functionCall: true,
id: 'abab6.5t-chat',
},
{
contextWindowTokens: 16_384,
description: '面向生产力场景,支持复杂任务处理和高效文本生成,适用于专业领域应用。',
displayName: 'abab5.5',
id: 'abab5.5-chat',
},
{
contextWindowTokens: 8192,
description: '专为中文人设对话场景设计,提供高质量的中文对话生成能力,适用于多种应用场景。',
displayName: 'abab5.5s',
id: 'abab5.5s-chat',
},
],
checkModel: 'abab6.5s-chat',
chatModels: [],
checkModel: 'MiniMax-M2',
description:
'MiniMax 是 2021 年成立的通用人工智能科技公司致力于与用户共创智能。MiniMax 自主研发了不同模态的通用大模型,其中包括万亿参数的 MoE 文本大模型、语音大模型以及图像大模型。并推出了海螺 AI 等应用。',
id: 'minimax',
modelsUrl: 'https://platform.minimaxi.com/document/Models',
name: 'Minimax',
settings: {
disableBrowserRequest: true, // CORS error
proxyUrl: {
placeholder: 'https://api.minimax.chat/v1',
},