💄 style: add mimo-v2-pro & mimo-v2-omni support (#13123)

This commit is contained in:
Zhijie He
2026-03-19 22:14:20 +08:00
committed by GitHub
parent 80cb6c9d11
commit e2d25be729
6 changed files with 456 additions and 36 deletions

View File

@@ -5,15 +5,118 @@ const xiaomimimoChatModels: AIChatModelCard[] = [
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
},
contextWindowTokens: 1_000_000,
description: 'Xiaomi MiMo-V2-Pro is specifically designed for high-intensity agent workflows in real-world scenarios. It features over 1 trillion total parameters (42B activated parameters), adopts an innovative hybrid attention architecture, and supports an ultra-long context length of up to 1 million tokens. Built on a powerful foundational model, we continuously scale computational resources across a broader range of agent scenarios, further expanding the action space of intelligence and achieving significant generalization—from coding to real-world task execution (“claw”).',
displayName: 'MiMo-V2 Pro',
enabled: true,
id: 'mimo-v2-pro',
maxOutput: 131_072,
pricing: {
currency: 'CNY',
units: [
{
lookup: {
prices: {
'[0, 0.256]': 7,
'[0.256, infinity]': 14,
},
pricingParams: ['textInput'],
},
name: 'textInput',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.256]': 1.4,
'[0.256, infinity]': 2.8,
},
pricingParams: ['textInput'],
},
name: 'textInput_cacheRead',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.256]': 21,
'[0.256, infinity]': 42,
},
pricingParams: ['textInput'],
},
name: 'textOutput',
strategy: 'lookup',
unit: 'millionTokens',
},
],
},
releasedAt: '2026-03-18',
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
video: true,
vision: true,
},
contextWindowTokens: 262_144,
description: 'MiMo-V2-Flash: An efficient model for reasoning, coding, and agent foundations.',
description: 'MiMo-V2-Omni is purpose-built for complex multimodal interaction and execution scenarios in the real world. We constructed a full-modality foundation from the ground up, integrating text, vision, and speech, and unified “perception” and “action” within a single architecture. This not only breaks the traditional limitation of models that emphasize understanding over execution, but also endows the model with native capabilities in multimodal perception, tool usage, function execution, and GUI operations. MiMo-V2-Omni can seamlessly integrate with major agent frameworks, achieving a leap from understanding to control while significantly lowering the barrier to deploying fully multimodal agents.',
displayName: 'MiMo-V2 Omni',
enabled: true,
id: 'mimo-v2-omni',
maxOutput: 131_072,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 2.8, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput_cacheRead', rate: 0.56, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 14, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2026-03-18',
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
},
contextWindowTokens: 262_144,
description: 'MiMo-V2-Flash is now officially open source! This is a MoE (Mixture-of-Experts) model purpose-built for extreme inference efficiency, with 309B total parameters (15B activated). Through innovations in a hybrid attention architecture and multi-layer MTP inference acceleration, it ranks among the global Top 2 open-source models across multiple agent benchmarking suites. Its coding capabilities surpass all open-source models and rival leading closed-source models such as Claude 4.5 Sonnet, while incurring only 2.5% of the inference cost and delivering 2× faster generation speed—pushing large-model inference efficiency to the limit.',
displayName: 'MiMo-V2 Flash',
enabled: true,
id: 'mimo-v2-flash',
maxOutput: 131_072,
maxOutput: 65_536,
pricing: {
currency: 'CNY',
units: [
{ name: 'textInput', rate: 0.7, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput_cacheRead', rate: 0.07, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2026-03-03',
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},

View File

@@ -3312,6 +3312,98 @@ describe('OpenAIStream', () => {
]);
});
it('should handle XiaomiMiMo annotations', async () => {
const mockOpenAIStream = new ReadableStream({
start(controller) {
controller.enqueue({
id: 'mimo-v2-omni',
choices: [
{
index: 0,
delta: {
role: 'assistant',
content: '',
annotations: [
{
type: 'url_citation',
url: 'https://biz.finance.sina.com.cn/usstock/usstock_news.php?symbol=ZNH',
title: '南方航空相关新闻_美股 - 新浪财经',
site_name: 'biz.finance.sina.com.cn',
summary:
'(ZNH) · 格隆汇 APP | 2026 年 03 月 19 日 11:09 港股异动丨航空股跌势不止成本压力巨大国内航司集体上调燃油附加费',
},
],
},
},
],
});
// Second chunk with finish_reason
controller.enqueue({
id: 'mimo-v2-omni',
choices: [
{
index: 0,
delta: {
role: 'assistant',
content: 'Some response text with annotations, should has no annotations',
annotations: [
{
type: 'url_citation',
url: 'https://biz.finance.sina.com.cn/usstock/usstock_news.php?symbol=ZNH',
title: '南方航空相关新闻_美股 - 新浪财经',
site_name: 'biz.finance.sina.com.cn',
summary:
'(ZNH) · 格隆汇 APP | 2026 年 03 月 19 日 11:09 港股异动丨航空股跌势不止成本压力巨大国内航司集体上调燃油附加费',
},
],
},
},
],
});
// Third chunk with finish_reason
controller.enqueue({
id: 'mimo-v2-omni',
choices: [
{
index: 0,
delta: {
content: 'Some response text',
},
finish_reason: 'stop',
},
],
});
controller.close();
},
});
const protocolStream = OpenAIStream(mockOpenAIStream);
const decoder = new TextDecoder();
const chunks = [];
// @ts-ignore
for await (const chunk of protocolStream) {
chunks.push(decoder.decode(chunk, { stream: true }));
}
// Should emit grounding event from first chunk, then text from second chunk
expect(chunks).toEqual([
'id: mimo-v2-omni\n',
'event: grounding\n',
`data: {"citations":[{"title":"南方航空相关新闻_美股 - 新浪财经","url":"https://biz.finance.sina.com.cn/usstock/usstock_news.php?symbol=ZNH"}]}\n\n`,
'id: mimo-v2-omni\n',
'event: text\n',
`data: "Some response text with annotations, should has no annotations"\n\n`,
'id: mimo-v2-omni\n',
'event: text\n',
`data: "Some response text"\n\n`,
]);
});
it('should handle MiniMax messages with annotations in finish_reason', async () => {
const mockOpenAIStream = new ReadableStream({
start(controller) {

View File

@@ -46,7 +46,6 @@ const processMarkdownBase64Images = (text: string): { cleanedText: string; urls:
const urls: string[] = [];
const mdRegex = /!\[[^\]]*\]\(\s*(data:image\/[\d+.A-Za-z-]+;base64,[^\s)]+)\s*\)/g;
let cleanedText = text;
let m: RegExpExecArray | null;
// Reset regex lastIndex to ensure we start from the beginning
@@ -57,7 +56,7 @@ const processMarkdownBase64Images = (text: string): { cleanedText: string; urls:
}
// Remove all markdown base64 image segments
cleanedText = text.replaceAll(mdRegex, '').trim();
const cleanedText = text.replaceAll(mdRegex, '').trim();
return { cleanedText, urls };
};
@@ -67,6 +66,12 @@ const transformOpenAIStream = (
streamContext: StreamContext,
payload?: ChatPayloadForTransformStream,
): StreamProtocolChunk | StreamProtocolChunk[] => {
if (streamContext.chunkIndex === undefined) {
streamContext.chunkIndex = 0;
} else {
streamContext.chunkIndex++;
}
// handle the first chunk error
if (FIRST_CHUNK_ERROR_KEY in chunk) {
delete chunk[FIRST_CHUNK_ERROR_KEY];
@@ -362,6 +367,33 @@ const transformOpenAIStream = (
return { data: item.finish_reason, id: chunk.id, type: 'stop' };
}
// XiaomiMiMo will return full annotations in the first chunk
// {"id":"65b10aeecba14877b4cd282d4e32f203","object":"chat.completion.chunk","created":1773907177,"model":"mimo-v2-omni","choices":[{"index":0,"delta":{"annotations":[{"site_name":"biz.finance.sina.com.cn","summary":"(ZNH) · 格隆汇 APP | 2026 年 03 月 19 日 11:09 港股异动丨航空股跌势不止成本压力巨大国内航司集体上调燃油附加费 · 每日经济新闻 | 2026 年 03 月 19 日 09:55 港股航空股再度走低南方 ...","title":"南方航空相关新闻_美股 - 新浪财经","type":"url_citation","url":"https://biz.finance.sina.com.cn/usstock/usstock_news.php?symbol=ZNH"}],"role":"assistant","content":""}}],"usage":{"web_search_usage":{"tool_usage":5,"page_usage":20}}}
if (
streamContext.chunkIndex === 0 &&
(item as any).delta &&
Array.isArray((item as any).delta.annotations) &&
(item as any).delta.annotations.length > 0
) {
const citations = (item as any).delta.annotations;
return [
{
data: {
citations: citations.map(
(item: any) =>
({
title: item.title,
url: item.url,
}) as ChatCitationItem,
),
},
id: chunk.id,
type: 'grounding',
},
];
}
if (item.delta) {
let reasoning_content = (() => {
if ('reasoning_content' in item.delta) return item.delta.reasoning_content;

View File

@@ -19,6 +19,7 @@ export type ChatPayloadForTransformStream = {
* context in the stream to save temporarily data
*/
export interface StreamContext {
chunkIndex?: number;
id: string;
/**
* As pplx citations is in every chunk, but we only need to return it once

View File

@@ -1,6 +1,6 @@
// @vitest-environment node
import { ModelProvider } from 'model-bank';
import { describe, expect, it, vi } from 'vitest';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { testProvider } from '../../providerTestUtils';
import { LobeXiaomiMiMoAI, params } from './index';
@@ -13,7 +13,7 @@ testProvider({
provider,
defaultBaseURL,
chatDebugEnv: 'DEBUG_XIAOMIMIMO_CHAT_COMPLETION',
chatModel: 'gpt-4o',
chatModel: 'mimo-v2-flash',
test: {
skipAPICall: true,
},
@@ -24,7 +24,7 @@ describe('LobeXiaomiMiMoAI - custom features', () => {
it('should map max_tokens to max_completion_tokens', () => {
const payload = {
max_tokens: 1000,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const result = params.chatCompletion!.handlePayload!(payload as any);
@@ -35,7 +35,7 @@ describe('LobeXiaomiMiMoAI - custom features', () => {
it('should set stream to true by default', () => {
const payload = {
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const result = params.chatCompletion!.handlePayload!(payload as any);
@@ -45,7 +45,7 @@ describe('LobeXiaomiMiMoAI - custom features', () => {
it('should preserve existing stream value', () => {
const payload = {
model: 'gpt-4o',
model: 'mimo-v2-flash',
stream: false,
};
@@ -57,21 +57,21 @@ describe('LobeXiaomiMiMoAI - custom features', () => {
it('should clamp temperature between 0 and 1.5', () => {
const payloadLow = {
temperature: -1,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultLow = params.chatCompletion!.handlePayload!(payloadLow as any);
expect(resultLow.temperature).toBe(0);
const payloadHigh = {
temperature: 2,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultHigh = params.chatCompletion!.handlePayload!(payloadHigh as any);
expect(resultHigh.temperature).toBe(1.5);
const payloadNormal = {
temperature: 0.7,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultNormal = params.chatCompletion!.handlePayload!(payloadNormal as any);
expect(resultNormal.temperature).toBe(0.7);
@@ -80,21 +80,21 @@ describe('LobeXiaomiMiMoAI - custom features', () => {
it('should clamp top_p between 0.01 and 1', () => {
const payloadLow = {
top_p: 0,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultLow = params.chatCompletion!.handlePayload!(payloadLow as any);
expect(resultLow.top_p).toBe(0.01);
const payloadHigh = {
top_p: 1.5,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultHigh = params.chatCompletion!.handlePayload!(payloadHigh as any);
expect(resultHigh.top_p).toBe(1);
const payloadNormal = {
top_p: 0.5,
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultNormal = params.chatCompletion!.handlePayload!(payloadNormal as any);
expect(resultNormal.top_p).toBe(0.5);
@@ -103,45 +103,193 @@ describe('LobeXiaomiMiMoAI - custom features', () => {
it('should handle thinking type enabled/disabled', () => {
const payloadEnabled = {
thinking: { type: 'enabled' },
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultEnabled = params.chatCompletion!.handlePayload!(payloadEnabled as any);
expect(resultEnabled.thinking).toEqual({ type: 'enabled' });
const payloadDisabled = {
thinking: { type: 'disabled' },
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultDisabled = params.chatCompletion!.handlePayload!(payloadDisabled as any);
expect(resultDisabled.thinking).toEqual({ type: 'disabled' });
const payloadOther = {
thinking: { type: 'other' },
model: 'gpt-4o',
model: 'mimo-v2-flash',
};
const resultOther = params.chatCompletion!.handlePayload!(payloadOther as any);
expect(resultOther.thinking).toBeUndefined();
});
it('should transform reasoning object to reasoning_content string', () => {
const payload = {
messages: [
{ role: 'user', content: 'Hello' },
{
role: 'assistant',
content: 'Hi there',
reasoning: { content: 'Let me think...', duration: 1000 },
},
{ role: 'user', content: 'How are you?' },
],
model: 'mimo-v2-flash',
thinking: { type: 'enabled' },
};
const result = params.chatCompletion!.handlePayload!(payload as any);
expect(result.messages).toEqual([
{ role: 'user', content: 'Hello' },
{
role: 'assistant',
content: 'Hi there',
reasoning_content: 'Let me think...',
},
{ role: 'user', content: 'How are you?' },
]);
});
it('should not modify messages without reasoning field', () => {
const payload = {
messages: [
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi there' },
],
model: 'mimo-v2-flash',
};
const result = params.chatCompletion!.handlePayload!(payload as any);
expect(result.messages).toEqual(payload.messages);
});
it('should handle empty reasoning content', () => {
const payload = {
messages: [
{
role: 'assistant',
content: 'Response',
reasoning: { duration: 1000 },
},
],
model: 'mimo-v2-flash',
thinking: { type: 'enabled' },
};
const result = params.chatCompletion!.handlePayload!(payload as any);
expect(result.messages[0]).toEqual({
role: 'assistant',
content: 'Response',
reasoning_content: '',
});
});
});
describe('Debug Configuration', () => {
it('should disable debug by default', () => {
delete process.env.DEBUG_XIAOMIMIMO_CHAT_COMPLETION;
const result = params.debug.chatCompletion();
expect(result).toBe(false);
});
it('should enable debug when env is set', () => {
process.env.DEBUG_XIAOMIMIMO_CHAT_COMPLETION = '1';
const result = params.debug.chatCompletion();
expect(result).toBe(true);
delete process.env.DEBUG_XIAOMIMIMO_CHAT_COMPLETION;
});
});
describe('models', () => {
it('should fetch and process model list', async () => {
const mockModels = [{ id: 'model-1' }, { id: 'model-2' }];
const client = {
models: {
list: vi.fn().mockResolvedValue({ data: mockModels }),
},
};
const mockClient = {
models: {
list: vi.fn(),
},
};
const result = await params.models!({ client: client as any });
beforeEach(() => {
vi.clearAllMocks();
});
expect(client.models.list).toHaveBeenCalled();
expect(result).toEqual(
expect.arrayContaining([
expect.objectContaining({ id: 'model-1' }),
expect.objectContaining({ id: 'model-2' }),
]),
);
it('should fetch and process models successfully', async () => {
mockClient.models.list.mockResolvedValue({
data: [{ id: 'mimo-v2-pro' }, { id: 'mimo-v2-flash' }, { id: 'mimo-v2-omni' }],
});
const models = await params.models({ client: mockClient as any });
expect(mockClient.models.list).toHaveBeenCalledTimes(1);
expect(models).toHaveLength(3);
expect(models[0].id).toBe('mimo-v2-pro');
expect(models[1].id).toBe('mimo-v2-flash');
expect(models[2].id).toBe('mimo-v2-omni');
});
it('should handle single model', async () => {
mockClient.models.list.mockResolvedValue({
data: [{ id: 'mimo-v2-pro' }],
});
const models = await params.models({ client: mockClient as any });
expect(models).toHaveLength(1);
expect(models[0].id).toBe('mimo-v2-pro');
});
it('should handle empty model list', async () => {
mockClient.models.list.mockResolvedValue({
data: [],
});
const models = await params.models({ client: mockClient as any });
expect(models).toEqual([]);
});
it('should process models with MODEL_LIST_CONFIGS', async () => {
mockClient.models.list.mockResolvedValue({
data: [{ id: 'mimo-v2-pro' }],
});
const models = await params.models({ client: mockClient as any });
// The processModelList function should merge with known model list
expect(models[0]).toHaveProperty('id');
expect(models[0].id).toBe('mimo-v2-pro');
});
it('should preserve model properties from API response', async () => {
mockClient.models.list.mockResolvedValue({
data: [
{ id: 'mimo-v2-pro', extra_field: 'value' },
{ id: 'mimo-v2-flash', another_field: 123 },
],
});
const models = await params.models({ client: mockClient as any });
expect(models).toHaveLength(2);
expect(models[0].id).toBe('mimo-v2-pro');
expect(models[1].id).toBe('mimo-v2-flash');
});
it('should handle models with different id patterns', async () => {
mockClient.models.list.mockResolvedValue({
data: [
{ id: 'mimo-v2-pro' },
{ id: 'mimo-v2-omni' },
{ id: 'mimo-v2-flash' },
{ id: 'mimo-v2-other' },
],
});
const models = await params.models({ client: mockClient as any });
expect(models).toHaveLength(4);
expect(models.every((m) => typeof m.id === 'string')).toBe(true);
});
});
});

View File

@@ -1,7 +1,8 @@
import { ModelProvider } from 'model-bank';
import { ModelProvider, xiaomimimo as xiaomimimoChatModels } from 'model-bank';
import type { OpenAICompatibleFactoryOptions } from '../../core/openaiCompatibleFactory';
import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory';
import { getModelMaxOutputs } from '../../utils/getModelMaxOutputs';
import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
const clamp = (value: number, min: number, max: number) => Math.min(max, Math.max(min, value));
@@ -14,13 +15,56 @@ export const params = {
baseURL: 'https://api.xiaomimimo.com/v1',
chatCompletion: {
handlePayload: (payload) => {
const { thinking, temperature, top_p, max_tokens, stream, ...rest } = payload as any;
const { enabledSearch, thinking, temperature, tools, top_p, max_tokens, stream, ...rest } =
payload as any;
const thinkingType = thinking?.type;
const xiaomiTools = enabledSearch
? [
...(tools || []),
{
type: 'web_search',
},
]
: tools;
const messages = payload.messages?.map((message: any) => {
const { reasoning, ...rest } = message;
const reasoningContent =
typeof rest.reasoning_content === 'string'
? rest.reasoning_content
: typeof reasoning?.content === 'string'
? reasoning.content
: undefined;
// Thinking Mode with tool calls requires assistant history messages to carry reasoning_content
if (message.role === 'assistant' && thinkingType === 'enabled') {
return {
...rest,
reasoning_content: reasoningContent ?? '',
};
}
if (reasoningContent !== undefined) {
return {
...rest,
reasoning_content: reasoningContent,
};
}
return rest;
});
return {
...rest,
max_completion_tokens: max_tokens,
max_completion_tokens:
max_tokens !== undefined
? max_tokens
: getModelMaxOutputs(payload.model, xiaomimimoChatModels),
messages,
stream: stream ?? true,
tools: xiaomiTools,
...(typeof temperature === 'number'
? { temperature: clamp(temperature, 0, 1.5) }
: undefined),