✨ feat: Support Interleaved thinking in MiniMax (#10255)

feat: Enhance LobeMinimaxAI with interleaved thinking and message processing - Updated LobeMinimaxAI to handle new message structure including reasoning details. - Added logic to process messages for reasoning content and signatures. - Resolved parameters with constraints and included reasoning_split in the payload. test: Update snapshots for NovitaAI, OpenAI, and PPIO models - Added new models and updated existing model descriptions in snapshots for NovitaAI. - Updated OpenAI model snapshots to reflect new model additions and descriptions. - Included new DeepSeek models in PPIO snapshots with detailed descriptions. fix: Improve error messages for quota and permission issues - Enhanced error messages for quota limits and permissions to improve clarity and user experience.
2026-03-30 13:59:22 +07:00 · 2025-11-18 11:19:53 +08:00
parent fbcd04696e
commit 13ca8e18c8
9 changed files with 2552 additions and 8 deletions
--- a/packages/model-runtime/src/core/contextBuilders/openai.ts
+++ b/packages/model-runtime/src/core/contextBuilders/openai.ts
@@ -51,6 +51,8 @@ export const convertOpenAIMessages = async (messages: OpenAI.ChatCompletionMessa

      // it's compatible for DeepSeek
      if (msg.reasoning_content !== undefined) result.reasoning_content = msg.reasoning_content;
+      // MiniMax uses reasoning_details for historical thinking, so forward it unchanged
+      if (msg.reasoning_details !== undefined) result.reasoning_details = msg.reasoning_details;

      return result;
    }),
--- a/packages/model-runtime/src/core/streams/openai/snapshots/responsesStream.test.ts.snap
+++ b/packages/model-runtime/src/core/streams/openai/snapshots/responsesStream.test.ts.snap
--- a/packages/model-runtime/src/core/streams/openai/openai.test.ts
+++ b/packages/model-runtime/src/core/streams/openai/openai.test.ts
@@ -2317,6 +2317,155 @@ describe('OpenAIStream', () => {
      );
    });

+    it('should handle reasoning_details array format from MiniMax M2', async () => {
+      const data = [
+        {
+          id: '055ccc4cbe1ca0dc18037256237d0823',
+          object: 'chat.completion.chunk',
+          created: 1762498892,
+          model: 'MiniMax-M2',
+          choices: [
+            {
+              index: 0,
+              delta: {
+                content: '',
+                role: 'assistant',
+                name: 'MiniMax AI',
+                audio_content: '',
+                reasoning_details: [
+                  {
+                    type: 'reasoning.text',
+                    id: 'reasoning-text-1',
+                    format: 'MiniMax-response-v1',
+                    index: 0,
+                    text: '中文打招呼说"你好"，',
+                  },
+                ],
+              },
+              finish_reason: null,
+            },
+          ],
+          usage: null,
+        },
+        {
+          id: '055ccc4cbe1ca0dc18037256237d0823',
+          object: 'chat.completion.chunk',
+          created: 1762498892,
+          model: 'MiniMax-M2',
+          choices: [
+            {
+              index: 0,
+              delta: {
+                reasoning_details: [
+                  {
+                    type: 'reasoning.text',
+                    id: 'reasoning-text-2',
+                    format: 'MiniMax-response-v1',
+                    index: 0,
+                    text: '我需要用中文回复。',
+                  },
+                ],
+              },
+              finish_reason: null,
+            },
+          ],
+          usage: null,
+        },
+        {
+          id: '055ccc4cbe1ca0dc18037256237d0823',
+          object: 'chat.completion.chunk',
+          created: 1762498892,
+          model: 'MiniMax-M2',
+          choices: [
+            {
+              index: 0,
+              delta: {
+                content: '你好',
+              },
+              finish_reason: null,
+            },
+          ],
+          usage: null,
+        },
+        {
+          id: '055ccc4cbe1ca0dc18037256237d0823',
+          object: 'chat.completion.chunk',
+          created: 1762498892,
+          model: 'MiniMax-M2',
+          choices: [
+            {
+              index: 0,
+              delta: {
+                content: '！',
+              },
+              finish_reason: null,
+            },
+          ],
+          usage: null,
+        },
+        {
+          id: '055ccc4cbe1ca0dc18037256237d0823',
+          object: 'chat.completion.chunk',
+          created: 1762498892,
+          model: 'MiniMax-M2',
+          choices: [
+            {
+              index: 0,
+              delta: {
+                content: '',
+              },
+              finish_reason: 'stop',
+            },
+          ],
+          usage: {
+            prompt_tokens: 10,
+            completion_tokens: 20,
+            total_tokens: 30,
+          },
+        },
+      ];
+
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          data.forEach((chunk) => {
+            controller.enqueue(chunk);
+          });
+
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks).toEqual(
+        [
+          'id: 055ccc4cbe1ca0dc18037256237d0823',
+          'event: reasoning',
+          `data: "中文打招呼说\\"你好\\"，"\n`,
+          'id: 055ccc4cbe1ca0dc18037256237d0823',
+          'event: reasoning',
+          `data: "我需要用中文回复。"\n`,
+          'id: 055ccc4cbe1ca0dc18037256237d0823',
+          'event: text',
+          `data: "你好"\n`,
+          'id: 055ccc4cbe1ca0dc18037256237d0823',
+          'event: text',
+          `data: "！"\n`,
+          'id: 055ccc4cbe1ca0dc18037256237d0823',
+          'event: usage',
+          `data: {"inputTextTokens":10,"outputTextTokens":20,"totalInputTokens":10,"totalOutputTokens":20,"totalTokens":30}\n`,
+        ].map((i) => `${i}\n`),
+      );
+    });
+
    it('should handle claude reasoning in litellm openai mode', async () => {
      const data = [
        {
@@ -3346,5 +3495,166 @@ describe('OpenAIStream', () => {
        'chat response streaming chunk parse error, please contact your API Provider to fix it.',
      );
    });
+
+    it('should handle MiniMax base_resp error with insufficient quota (1008)', async () => {
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          controller.enqueue({
+            id: 'minimax-error-1008',
+            choices: null,
+            base_resp: {
+              status_code: 1008,
+              status_msg: 'insufficient balance',
+            },
+          });
+
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks[0]).toBe('id: minimax-error-1008\n');
+      expect(chunks[1]).toBe('event: error\n');
+      expect(chunks[2]).toContain('InsufficientQuota');
+      expect(chunks[2]).toContain('insufficient balance');
+      expect(chunks[2]).toContain('minimax');
+    });
+
+    it('should handle MiniMax base_resp error with invalid API key (2049)', async () => {
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          controller.enqueue({
+            id: 'minimax-error-2049',
+            choices: null,
+            base_resp: {
+              status_code: 2049,
+              status_msg: 'invalid API Key',
+            },
+          });
+
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks[0]).toBe('id: minimax-error-2049\n');
+      expect(chunks[1]).toBe('event: error\n');
+      expect(chunks[2]).toContain('InvalidProviderAPIKey');
+      expect(chunks[2]).toContain('invalid API Key');
+    });
+
+    it('should handle MiniMax base_resp error with rate limit (1002)', async () => {
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          controller.enqueue({
+            id: 'minimax-error-1002',
+            choices: null,
+            base_resp: {
+              status_code: 1002,
+              status_msg: 'request frequency exceeds limit',
+            },
+          });
+
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks[0]).toBe('id: minimax-error-1002\n');
+      expect(chunks[1]).toBe('event: error\n');
+      expect(chunks[2]).toContain('QuotaLimitReached');
+      expect(chunks[2]).toContain('request frequency exceeds limit');
+    });
+
+    it('should handle MiniMax base_resp error with context window exceeded (1039)', async () => {
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          controller.enqueue({
+            id: 'minimax-error-1039',
+            choices: null,
+            base_resp: {
+              status_code: 1039,
+              status_msg: 'token limit exceeded',
+            },
+          });
+
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks[0]).toBe('id: minimax-error-1039\n');
+      expect(chunks[1]).toBe('event: error\n');
+      expect(chunks[2]).toContain('ExceededContextWindow');
+      expect(chunks[2]).toContain('token limit exceeded');
+    });
+
+    it('should handle MiniMax base_resp error with fallback to ProviderBizError', async () => {
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          controller.enqueue({
+            id: 'minimax-error-unknown',
+            choices: null,
+            base_resp: {
+              status_code: 9999,
+              status_msg: 'unknown error',
+            },
+          });
+
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks[0]).toBe('id: minimax-error-unknown\n');
+      expect(chunks[1]).toBe('event: error\n');
+      expect(chunks[2]).toContain('ProviderBizError');
+      expect(chunks[2]).toContain('unknown error');
+    });
  });
 });
--- a/packages/model-runtime/src/core/streams/openai/openai.ts
+++ b/packages/model-runtime/src/core/streams/openai/openai.ts
@@ -71,6 +71,55 @@ const transformOpenAIStream = (
    return { data: errorData, id: 'first_chunk_error', type: 'error' };
  }

+  // MiniMax 会在 base_resp 中返回业务错误（如余额不足），但不走 FIRST_CHUNK_ERROR_KEY
+  // 典型返回：{ id: '...', choices: null, base_resp: { status_code: 1008, status_msg: 'insufficient balance' }, usage: {...} }
+  if ((chunk as any).base_resp && typeof (chunk as any).base_resp.status_code === 'number') {
+    const baseResp = (chunk as any).base_resp as {
+      message?: string;
+      status_code: number;
+      status_msg?: string;
+    };
+
+    if (baseResp.status_code !== 0) {
+      // 根据 MiniMax 错误码映射到对应的错误类型
+      let errorType: ILobeAgentRuntimeErrorType = AgentRuntimeErrorType.ProviderBizError;
+
+      switch (baseResp.status_code) {
+        // 1004 - 未授权 / Token 不匹配 / 2049 - 无效的 API Key
+        case 1004:
+        case 2049: {
+          errorType = AgentRuntimeErrorType.InvalidProviderAPIKey;
+          break;
+        }
+        // 1008 - 余额不足
+        case 1008: {
+          errorType = AgentRuntimeErrorType.InsufficientQuota;
+          break;
+        }
+        // 1002 - 请求频率超限 / 1041 - 连接数限制 / 2045 - 请求频率增长超限
+        case 1002:
+        case 1041:
+        case 2045: {
+          errorType = AgentRuntimeErrorType.QuotaLimitReached;
+          break;
+        }
+        // 1039 - Token 限制
+        case 1039: {
+          errorType = AgentRuntimeErrorType.ExceededContextWindow;
+          break;
+        }
+      }
+
+      const errorData: ChatMessageError = {
+        body: { ...baseResp, provider: 'minimax' },
+        message: baseResp.status_msg || baseResp.message || 'MiniMax provider error',
+        type: errorType,
+      };
+
+      return { data: errorData, id: chunk.id, type: 'error' };
+    }
+  }
+
  try {
    // maybe need another structure to add support for multiple choices
    if (!Array.isArray(chunk.choices) || chunk.choices.length === 0) {
@@ -265,6 +314,24 @@ const transformOpenAIStream = (
      let reasoning_content = (() => {
        if ('reasoning_content' in item.delta) return item.delta.reasoning_content;
        if ('reasoning' in item.delta) return item.delta.reasoning;
+        // Handle MiniMax M2 reasoning_details format (array of objects with text field)
+        if ('reasoning_details' in item.delta) {
+          const details = item.delta.reasoning_details;
+          if (Array.isArray(details)) {
+            return details
+              .filter((detail: any) => detail.text)
+              .map((detail: any) => detail.text)
+              .join('');
+          }
+          if (typeof details === 'string') {
+            return details;
+          }
+          if (typeof details === 'object' && details !== null && 'text' in details) {
+            return details.text;
+          }
+          // Fallback for unexpected types
+          return '';
+        }
        // Handle content array format with thinking blocks (e.g. mistral AI Magistral model)
        if ('content' in item.delta && Array.isArray(item.delta.content)) {
          return item.delta.content
--- a/packages/model-runtime/src/providers/minimax/index.ts
+++ b/packages/model-runtime/src/providers/minimax/index.ts
@@ -13,17 +13,45 @@ export const LobeMinimaxAI = createOpenAICompatibleRuntime({
  baseURL: 'https://api.minimaxi.com/v1',
  chatCompletion: {
    handlePayload: (payload) => {
-      const { enabledSearch, max_tokens, temperature, tools, top_p, ...params } = payload;
+      const { enabledSearch, max_tokens, messages, temperature, tools, top_p, ...params } = payload;

      const minimaxTools = enabledSearch
        ? [
-          ...(tools || []),
-          {
-            type: 'web_search',
-          },
-        ]
+            ...(tools || []),
+            {
+              type: 'web_search',
+            },
+          ]
        : tools;

+      // Interleaved thinking
+      const processedMessages = messages.map((message: any) => {
+        if (message.role === 'assistant' && message.reasoning) {
+          // 只处理没有 signature 的历史推理内容
+          if (!message.reasoning.signature && message.reasoning.content) {
+            const { reasoning, ...messageWithoutReasoning } = message;
+            return {
+              ...messageWithoutReasoning,
+              reasoning_details: [
+                {
+                  format: 'MiniMax-response-v1',
+                  id: 'reasoning-text-0',
+                  index: 0,
+                  text: reasoning.content,
+                  type: 'reasoning.text',
+                },
+              ],
+            };
+          }
+
+          // 有 signature 或没有 content 的情况，移除 reasoning 字段
+          // eslint-disable-next-line unused-imports/no-unused-vars, @typescript-eslint/no-unused-vars
+          const { reasoning, ...messageWithoutReasoning } = message;
+          return messageWithoutReasoning;
+        }
+        return message;
+      });
+
      // Resolve parameters with constraints
      const resolvedParams = resolveParameters(
        {
@@ -46,6 +74,8 @@ export const LobeMinimaxAI = createOpenAICompatibleRuntime({
      return {
        ...params,
        max_tokens: resolvedParams.max_tokens,
+        messages: processedMessages,
+        reasoning_split: true,
        temperature: finalTemperature,
        tools: minimaxTools,
        top_p: resolvedParams.top_p,
--- a/packages/model-runtime/src/providers/novita/snapshots/index.test.ts.snap
+++ b/packages/model-runtime/src/providers/novita/snapshots/index.test.ts.snap
@@ -581,3 +581,587 @@ Designed for a wide variety of tasks, it empowers developers and researchers to
  },
 ]
 `;
+
+exports[`NovitaAI models should get models 1`] = `
+[
+  {
+    "contextWindowTokens": 8192,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.",
+    "displayName": "meta-llama/llama-3-8b-instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-8b-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.063,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.063,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-25",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.",
+    "displayName": "meta-llama/llama-3-70b-instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-70b-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.55,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.76,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-25",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, outperforming several leading closed-source models.",
+    "displayName": "meta-llama/llama-3.1-8b-instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-8b-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.1,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.1,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-07-24",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "Meta's latest class of models, Llama 3.1, has launched with a variety of sizes and configurations. The 70B instruct-tuned version is optimized for high-quality dialogue use cases. It has demonstrated strong performance in human evaluations compared to leading closed-source models.",
+    "displayName": "meta-llama/llama-3.1-70b-instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-70b-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.55,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.76,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-07-24",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 32768,
+    "description": "Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. This 405B instruct-tuned version is optimized for high-quality dialogue use cases. It has demonstrated strong performance compared to leading closed-source models, including GPT-4o and Claude 3.5 Sonnet, in evaluations.",
+    "displayName": "meta-llama/llama-3.1-405b-instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-405b-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 3,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 5,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-07-24",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": 
+"Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.
+Designed for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness."
+,
+    "displayName": "google/gemma-2-9b-it",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemma-2-9b-it",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.08,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.08,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-07-18",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "This is a fine-tuned Llama-2 model designed to support longer and more detailed writing prompts, as well as next-chapter generation. It also includes an experimental role-playing instruction set with multi-round dialogues, character interactions, and varying numbers of participants",
+    "displayName": "jondurbin/airoboros-l2-70b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "jondurbin/airoboros-l2-70b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.5,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.5,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-07-17",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.",
+    "displayName": "nousresearch/hermes-2-pro-llama-3-8b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/hermes-2-pro-llama-3-8b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.14,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.14,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-06-27",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 32768,
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.",
+    "displayName": "mistralai/mistral-7b-instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-7b-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.065,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.065,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-06-27",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 16000,
+    "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of Mixtral 8x22B Instruct. It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.The model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use.",
+    "displayName": "cognitivecomputations/dolphin-mixtral-8x22b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cognitivecomputations/dolphin-mixtral-8x22b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.9,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.9,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-06-27",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 16000,
+    "description": "The uncensored llama3 model is a powerhouse of creativity, excelling in both roleplay and story writing. It offers a liberating experience during roleplays, free from any restrictions. This model stands out for its immense creativity, boasting a vast array of unique ideas and plots, truly a treasure trove for those seeking originality. Its unrestricted nature during roleplays allows for the full breadth of imagination to unfold, akin to an enhanced, big-brained version of Stheno. Perfect for creative minds seeking a boundless platform for their imaginative expressions, the uncensored llama3 model is an ideal choice",
+    "displayName": "sao10k/l3-70b-euryale-v2.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sao10k/l3-70b-euryale-v2.1",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 1.48,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 1.48,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-06-18",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.",
+    "displayName": "sophosympatheia/midnight-rose-70b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sophosympatheia/midnight-rose-70b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.8,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.8,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-06-17",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "The idea behind this merge is that each layer is composed of several tensors, which are in turn responsible for specific functions. Using MythoLogic-L2's robust understanding as its input and Huginn's extensive writing capability as its output seems to have resulted in a model that exceeds at both, confirming my theory. (More details to be released at a later time).",
+    "displayName": "gryphe/mythomax-l2-13b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gryphe/mythomax-l2-13b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.119,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.119,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-25",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions. This model was fine-tuned by Nous Research, with Teknium and Emozilla leading the fine tuning process and dataset curation, Redmond AI sponsoring the compute, and several other contributors.",
+    "displayName": "nousresearch/nous-hermes-llama2-13b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/nous-hermes-llama2-13b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.17,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.17,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-25",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 32768,
+    "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
+    "displayName": "Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.27,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.27,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-25",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "A Mythomax/MLewd_13B-style merge of selected 70B models. A multi-model merge of several LLaMA2 70B finetunes for roleplaying and creative work. The goal was to create a model that combines creativity with intelligence for an enhanced experience.",
+    "displayName": "lzlv_70b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lzlv_70b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.58,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.78,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-25",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "OpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.",
+    "displayName": "teknium/openhermes-2.5-mistral-7b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "teknium/openhermes-2.5-mistral-7b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.17,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.17,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-24",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 65535,
+    "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.",
+    "displayName": "microsoft/wizardlm-2-8x22b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/wizardlm-2-8x22b",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": {
+      "units": [
+        {
+          "name": "textInput",
+          "rate": 0.64,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+        {
+          "name": "textOutput",
+          "rate": 0.64,
+          "strategy": "fixed",
+          "unit": "millionTokens",
+        },
+      ],
+    },
+    "reasoning": false,
+    "releasedAt": "2024-04-24",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+]
+`;
--- a/packages/model-runtime/src/providers/openai/snapshots/index.test.ts.snap
+++ b/packages/model-runtime/src/providers/openai/snapshots/index.test.ts.snap
@@ -516,3 +516,520 @@ exports[`LobeOpenAI > models > should get models 1`] = `
  },
 ]
 `;
+
+exports[`LobeOpenAI models should get models 1`] = `
+[
+  {
+    "contextWindowTokens": undefined,
+    "description": "通用语音识别模型，支持多语言语音识别、语音翻译和语言识别。",
+    "displayName": "Whisper",
+    "enabled": false,
+    "functionCall": false,
+    "id": "whisper-1",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-02-27",
+    "search": false,
+    "type": "stt",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "davinci-002",
+    "enabled": false,
+    "functionCall": false,
+    "id": "davinci-002",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-08-21",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 16384,
+    "description": "GPT 3.5 Turbo，适用于各种文本生成和理解任务，Currently points to gpt-3.5-turbo-0125",
+    "displayName": "GPT-3.5 Turbo",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-3.5-turbo",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-02-28",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "第二代 DALL·E 模型，支持更真实、准确的图像生成，分辨率是第一代的4倍",
+    "displayName": "DALL·E 2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "dall-e-2",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "parameters": {
+      "imageUrl": {
+        "default": null,
+      },
+      "prompt": {
+        "default": "",
+      },
+      "size": {
+        "default": "1024x1024",
+        "enum": [
+          "256x256",
+          "512x512",
+          "1024x1024",
+        ],
+      },
+    },
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-01",
+    "search": false,
+    "type": "image",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "gpt-3.5-turbo-16k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-3.5-turbo-16k",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-05-10",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "tts-1-hd-1106",
+    "enabled": false,
+    "functionCall": false,
+    "id": "tts-1-hd-1106",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-03",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "最新的文本转语音模型，针对质量进行优化",
+    "displayName": "TTS-1 HD",
+    "enabled": false,
+    "functionCall": false,
+    "id": "tts-1-hd",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-03",
+    "search": false,
+    "type": "tts",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "gpt-3.5-turbo-16k-0613",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-3.5-turbo-16k-0613",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-05-30",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "最强大的向量化模型，适用于英文和非英文任务",
+    "displayName": "Text Embedding 3 Large",
+    "enabled": false,
+    "functionCall": false,
+    "id": "text-embedding-3-large",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2024-01-22",
+    "search": false,
+    "type": "embedding",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "gpt-4-1106-vision-preview",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-4-1106-vision-preview",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2024-03-26",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "gpt-3.5-turbo-instruct-0914",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-3.5-turbo-instruct-0914",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-09-07",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 128000,
+    "description": "最新的 GPT-4 Turbo 模型具备视觉功能。现在，视觉请求可以使用 JSON 模式和函数调用。 GPT-4 Turbo 是一个增强版本，为多模态任务提供成本效益高的支持。它在准确性和效率之间找到平衡，适合需要进行实时交互的应用程序场景。",
+    "displayName": "GPT-4 Turbo Preview 0125",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-4-0125-preview",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2024-01-23",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 128000,
+    "description": "最新的 GPT-4 Turbo 模型具备视觉功能。现在，视觉请求可以使用 JSON 模式和函数调用。 GPT-4 Turbo 是一个增强版本，为多模态任务提供成本效益高的支持。它在准确性和效率之间找到平衡，适合需要进行实时交互的应用程序场景。",
+    "displayName": "GPT-4 Turbo Preview",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-4-turbo-preview",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2024-01-23",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 4096,
+    "description": "GPT 3.5 Turbo，适用于各种文本生成和理解任务，对指令遵循的优化",
+    "displayName": "GPT-3.5 Turbo Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-3.5-turbo-instruct",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-08-24",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "gpt-3.5-turbo-0301",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-3.5-turbo-0301",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-03-01",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "gpt-3.5-turbo-0613",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-3.5-turbo-0613",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-06-12",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "最新的文本转语音模型，针对实时场景优化速度",
+    "displayName": "TTS-1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "tts-1",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-04-19",
+    "search": false,
+    "type": "tts",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "最新的 DALL·E 模型，于2023年11月发布。支持更真实、准确的图像生成，具有更强的细节表现力",
+    "displayName": "DALL·E 3",
+    "enabled": false,
+    "functionCall": false,
+    "id": "dall-e-3",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "parameters": {
+      "prompt": {
+        "default": "",
+      },
+      "quality": {
+        "default": "standard",
+        "enum": [
+          "standard",
+          "hd",
+        ],
+      },
+      "size": {
+        "default": "1024x1024",
+        "enum": [
+          "1024x1024",
+          "1792x1024",
+          "1024x1792",
+        ],
+      },
+    },
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-10-31",
+    "search": false,
+    "type": "image",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 16384,
+    "description": "GPT 3.5 Turbo，适用于各种文本生成和理解任务，Currently points to gpt-3.5-turbo-0125",
+    "displayName": "GPT-3.5 Turbo 1106",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-3.5-turbo-1106",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-02",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 128000,
+    "description": "最新的 GPT-4 Turbo 模型具备视觉功能。现在，视觉请求可以使用 JSON 模式和函数调用。 GPT-4 Turbo 是一个增强版本，为多模态任务提供成本效益高的支持。它在准确性和效率之间找到平衡，适合需要进行实时交互的应用程序场景。",
+    "displayName": "GPT-4 Turbo Preview 1106",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-4-1106-preview",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-02",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "babbage-002",
+    "enabled": false,
+    "functionCall": false,
+    "id": "babbage-002",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-08-21",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "tts-1-1106",
+    "enabled": false,
+    "functionCall": false,
+    "id": "tts-1-1106",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-03",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 128000,
+    "description": "GPT-4 视觉预览版，专为图像分析和处理任务设计。",
+    "displayName": "GPT 4 Turbo with Vision Preview",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gpt-4-vision-preview",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-11-02",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": true,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "高效且经济的新一代 Embedding 模型，适用于知识检索、RAG 应用等场景",
+    "displayName": "Text Embedding 3 Small",
+    "enabled": false,
+    "functionCall": false,
+    "id": "text-embedding-3-small",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2024-01-22",
+    "search": false,
+    "type": "embedding",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "GPT-4 提供了一个更大的上下文窗口，能够处理更长的文本输入，适用于需要广泛信息整合和数据分析的场景。",
+    "displayName": "GPT-4",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-4",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-06-27",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": undefined,
+    "description": "",
+    "displayName": "text-embedding-ada-002",
+    "enabled": false,
+    "functionCall": false,
+    "id": "text-embedding-ada-002",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2022-12-16",
+    "search": false,
+    "type": "embedding",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 16384,
+    "description": "GPT 3.5 Turbo，适用于各种文本生成和理解任务，Currently points to gpt-3.5-turbo-0125",
+    "displayName": "GPT-3.5 Turbo 0125",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-3.5-turbo-0125",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2024-01-23",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 8192,
+    "description": "GPT-4 提供了一个更大的上下文窗口，能够处理更长的文本输入，适用于需要广泛信息整合和数据分析的场景。",
+    "displayName": "GPT-4 0613",
+    "enabled": false,
+    "functionCall": true,
+    "id": "gpt-4-0613",
+    "imageOutput": false,
+    "maxOutput": undefined,
+    "pricing": undefined,
+    "reasoning": false,
+    "releasedAt": "2023-06-12",
+    "search": false,
+    "type": "chat",
+    "video": false,
+    "vision": false,
+  },
+]
+`;
--- a/packages/model-runtime/src/providers/ppio/snapshots/index.test.ts.snap
+++ b/packages/model-runtime/src/providers/ppio/snapshots/index.test.ts.snap
@@ -26,3 +26,30 @@ exports[`PPIO > models > should get models 1`] = `
  },
 ]
 `;
+
+exports[`PPIO models should get models 1`] = `
+[
+  {
+    "contextWindowTokens": 64000,
+    "description": "DeepSeek R1是DeepSeek团队发布的最新开源模型，具备非常强悍的推理性能，尤其在数学、编程和推理任务上达到了与OpenAI的o1模型相当的水平。",
+    "displayName": "DeepSeek: DeepSeek R1 (community)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "deepseek/deepseek-r1/community",
+    "reasoning": true,
+    "type": "chat",
+    "vision": false,
+  },
+  {
+    "contextWindowTokens": 64000,
+    "description": "DeepSeek-V3在推理速度方面实现了比之前模型的重大突破。在开源模型中排名第一，并可与全球最先进的闭源模型相媲美。DeepSeek-V3 采用了多头潜在注意力 （MLA） 和 DeepSeekMoE 架构，这些架构在 DeepSeek-V2 中得到了全面验证。此外，DeepSeek-V3 开创了一种用于负载均衡的辅助无损策略，并设定了多标记预测训练目标以获得更强的性能。",
+    "displayName": "DeepSeek: DeepSeek V3 (community)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "deepseek/deepseek-v3/community",
+    "reasoning": false,
+    "type": "chat",
+    "vision": false,
+  },
+]
+`;
--- a/src/locales/default/error.ts
+++ b/src/locales/default/error.ts
@@ -111,12 +111,12 @@ export default {
    LocationNotSupportError:
      '很抱歉，你的所在地区不支持此模型服务，可能是由于区域限制或服务未开通。请确认当前地区是否支持使用此服务，或尝试使用切换到其他地区后重试。',
    InsufficientQuota:
-      '很抱歉，该密钥的配额(quota)已达上限，请检查账户余额是否充足，或增大密钥配额后再试',
+      '很抱歉，该密钥的配额 (quota) 已达上限，请检查账户余额是否充足，或增大密钥配额后再试',
    ModelNotFound:
      '很抱歉，无法请求到相应的模型，可能是模型不存在或者没有访问权限导致，请更换 API Key 或调整访问权限后重试',
    ExceededContextWindow: '当前请求内容超出模型可处理的长度，请减少内容量后重试',
    QuotaLimitReached:
-      '很抱歉，当前 Token 用量或请求次数已达该密钥的配额(quota)上限，请增加该密钥的配额或稍后再试',
+      '很抱歉，当前 Token 用量或请求次数已达该密钥的配额 (quota) 上限，请增加该密钥的配额或稍后再试',
    PermissionDenied: '很抱歉，你没有权限访问该服务，请检查你的密钥是否有访问权限',
    InvalidProviderAPIKey: '{{provider}} API Key 不正确或为空，请检查 {{provider}} API Key 后重试',
    ProviderBizError: '请求 {{provider}} 服务出错，请根据以下信息排查或重试',