feat: add LongCat(美团) provider support (#12603)

* feat: add LongCat(美团) provider support

* chore: remove enable_thinking, due to not in doc anymore
This commit is contained in:
Zhijie He
2026-03-09 16:59:29 +08:00
committed by GitHub
parent 50dbc653fa
commit 8d4d657a5d
13 changed files with 162 additions and 0 deletions

View File

@@ -31,6 +31,7 @@
"jina.description": "Founded in 2020, Jina AI is a leading search AI company. Its search stack includes vector models, rerankers, and small language models to build reliable, high-quality generative and multimodal search apps.",
"lmstudio.description": "LM Studio is a desktop app for developing and experimenting with LLMs on your computer.",
"lobehub.description": "LobeHub Cloud uses official APIs to access AI models and measures usage with Credits tied to model tokens.",
"longcat.description": "LongCat is a series of generative AI large models independently developed by Meituan. It is designed to enhance internal enterprise productivity and enable innovative applications through an efficient computational architecture and strong multimodal capabilities.",
"minimax.description": "Founded in 2021, MiniMax builds general-purpose AI with multimodal foundation models, including trillion-parameter MoE text models, speech models, and vision models, along with apps like Hailuo AI.",
"mistral.description": "Mistral offers advanced general, specialized, and research models for complex reasoning, multilingual tasks, and code generation, with function-calling for custom integrations.",
"modelscope.description": "ModelScope is Alibaba Clouds model-as-a-service platform, offering a wide range of AI models and inference services.",

View File

@@ -31,6 +31,7 @@
"jina.description": "Jina AI 成立于 2020 年,是领先的搜索 AI 公司,其搜索技术栈包括向量模型、重排序器与小型语言模型,支持构建高质量的生成式与多模态搜索应用。",
"lmstudio.description": "LM Studio 是一款桌面应用,支持在本地开发与实验大语言模型。",
"lobehub.description": "LobeHub Cloud 使用官方 API 访问 AI 模型,并通过与模型令牌相关的积分来衡量使用情况。",
"longcat.description": "LongCat 是由美团自主研发的生成式 AI 大模型系列,旨在通过高效的计算架构和强大的多模态能力,提升企业内部工作效率并推动创新应用的发展。",
"minimax.description": "MiniMax 成立于 2021 年,致力于构建通用 AI拥有多模态基础模型包括万亿参数的 MoE 文本模型、语音模型与视觉模型,并推出海螺 AI 等应用。",
"mistral.description": "Mistral 提供先进的通用、专业与研究型模型,支持复杂推理、多语言任务与代码生成,具备函数调用能力以实现定制集成。",
"modelscope.description": "ModelScope 是阿里云的模型即服务平台,提供丰富的 AI 模型与推理服务。",

View File

@@ -38,6 +38,7 @@
"./internlm": "./src/aiModels/internlm.ts",
"./jina": "./src/aiModels/jina.ts",
"./lmstudio": "./src/aiModels/lmstudio.ts",
"./longcat": "./src/aiModels/longcat.ts",
"./minimax": "./src/aiModels/minimax.ts",
"./mistral": "./src/aiModels/mistral.ts",
"./modelscope": "./src/aiModels/modelscope.ts",

View File

@@ -33,6 +33,7 @@ import { default as internlm } from './internlm';
import { default as jina } from './jina';
import { default as lmstudio } from './lmstudio';
import { default as lobehub } from './lobehub/index';
import { default as longcat } from './longcat';
import { default as minimax } from './minimax';
import { default as mistral } from './mistral';
import { default as modelscope } from './modelscope';
@@ -125,6 +126,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
internlm,
jina,
lmstudio,
longcat,
...(ENABLE_BUSINESS_FEATURES ? { lobehub } : {}),
minimax,
mistral,
@@ -200,6 +202,7 @@ export { default as internlm } from './internlm';
export { default as jina } from './jina';
export { default as lmstudio } from './lmstudio';
export { default as lobehub } from './lobehub/index';
export { default as longcat } from './longcat';
export { default as minimax } from './minimax';
export { default as mistral } from './mistral';
export { default as modelscope } from './modelscope';

View File

@@ -0,0 +1,85 @@
import type { AIChatModelCard } from '../types/aiModel';
const longcatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
},
contextWindowTokens: 327_680,
description:
'The LongCat-Flash-Lite model has been officially released. It adopts an efficient Mixture-of-Experts (MoE) architecture, with 68.5 billion total parameters and approximately 3 billion activated parameters. Through the use of an N-gram embedding table, it achieves highly efficient parameter utilization, and it is deeply optimized for inference efficiency and specific application scenarios. Compared to models of a similar scale, its core features are as follows:Outstanding Inference Efficiency: By leveraging the N-gram embedding table to fundamentally alleviate the I/O bottleneck inherent in MoE architectures, combined with dedicated caching mechanisms and kernel-level optimizations, it significantly reduces inference latency and improves overall efficiency. Strong Agent and Code Performance: It demonstrates highly competitive capabilities in tool invocation and software development tasks, delivering exceptional performance relative to its model size.',
displayName: 'LongCat-Flash-Lite',
enabled: true,
id: 'LongCat-Flash-Lite',
pricing: {
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2026-02-05',
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 262_144,
description:
'The LongCat-Flash-Thinking-2601 model has been officially released. As an upgraded reasoning model built on a Mixture-of-Experts (MoE) architecture, it features a total of 560 billion parameters. While maintaining strong competitiveness across traditional reasoning benchmarks, it systematically enhances Agent-level reasoning capabilities through large-scale multi-environment reinforcement learning. Compared to the LongCat-Flash-Thinking model, the key upgrades are as follows: Extreme Robustness in Noisy Environments: Through systematic curriculum-style training targeting noise and uncertainty in real-world settings, the model demonstrates outstanding performance in Agent tool invocation, Agent-based search, and tool-integrated reasoning, with significantly improved generalization. Powerful Agent Capabilities: By constructing a tightly coupled dependency graph encompassing more than 60 tools, and scaling training through multi-environment expansion and large-scale exploratory learning, the model markedly improves its ability to generalize to complex and out-of-distribution real-world scenarios. Advanced Deep Thinking Mode: It expands the breadth of reasoning via parallel inference and deepens analytical capability through recursive feedback-driven summarization and abstraction mechanisms, effectively addressing highly challenging problems.',
displayName: 'LongCat-Flash-Thinking-2601',
enabled: true,
id: 'LongCat-Flash-Thinking-2601',
pricing: {
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2026-01-14',
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 262_144,
description:
'LongCat-Flash-Thinking has been officially released and open-sourced simultaneously. It is a deep reasoning model that can be used for free conversations within LongCat Chat, or accessed via API by specifying model=LongCat-Flash-Thinking.',
displayName: 'LongCat-Flash-Thinking',
id: 'LongCat-Flash-Thinking',
pricing: {
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-09-22',
type: 'chat',
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 262_144,
description:
'The LongCat-Flash-Chat model has been upgraded to a new version. This update involves enhancements to model capabilities only; the model name and API invocation method remain unchanged. Building upon its hallmark “extreme efficiency” and “lightning-fast response,” the new version further strengthens contextual understanding and real-world programming performance: Significantly Enhanced Coding Capabilities: Deeply optimized for developer-centric scenarios, the model delivers substantial improvements in code generation, debugging, and explanation tasks. Developers are strongly encouraged to evaluate and benchmark these enhancements. Support for 256K Ultra-Long Context: The context window has doubled from the previous generation (128K) to 256K, enabling efficient processing of massive documents and long-sequence tasks. Comprehensively Improved Multilingual Performance: Provides strong support for nine languages, including Spanish, French, Arabic, Portuguese, Russian, and Indonesian. More Powerful Agent Capabilities: Demonstrates greater robustness and efficiency in complex tool invocation and multi-step task execution.',
displayName: 'LongCat-Flash-Chat',
enabled: true,
id: 'LongCat-Flash-Chat',
pricing: {
units: [
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2025-12-12',
type: 'chat',
},
];
export const allModels = [...longcatModels];
export default allModels;

View File

@@ -31,6 +31,7 @@ export enum ModelProvider {
Jina = 'jina',
LMStudio = 'lmstudio',
LobeHub = 'lobehub',
LongCat = 'longcat',
Minimax = 'minimax',
Mistral = 'mistral',
ModelScope = 'modelscope',

View File

@@ -34,6 +34,7 @@ import InternLMProvider from './internlm';
import JinaProvider from './jina';
import LMStudioProvider from './lmstudio';
import LobeHubProvider from './lobehub';
import LongCatProvider from './longcat';
import MinimaxProvider from './minimax';
import MistralProvider from './mistral';
import ModelScopeProvider from './modelscope';
@@ -204,6 +205,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
ZenMuxProvider,
StraicoProvider,
XiaomiMiMoProvider,
LongCatProvider,
];
export const filterEnabledModels = (provider: ModelProviderCard) => {
@@ -249,6 +251,7 @@ export { default as InternLMProviderCard } from './internlm';
export { default as JinaProviderCard } from './jina';
export { default as LMStudioProviderCard } from './lmstudio';
export { default as LobeHubProviderCard } from './lobehub';
export { default as LongCatProviderCard } from './longcat';
export { default as MinimaxProviderCard } from './minimax';
export { default as MistralProviderCard } from './mistral';
export { default as ModelScopeProviderCard } from './modelscope';

View File

@@ -0,0 +1,22 @@
import { type ModelProviderCard } from '@/types/llm';
const LongCat: ModelProviderCard = {
chatModels: [],
checkModel: 'LongCat-Flash-Lite',
description:
'LongCat is a series of generative AI large models independently developed by Meituan. It is designed to enhance internal enterprise productivity and enable innovative applications through an efficient computational architecture and strong multimodal capabilities.',
id: 'longcat',
modelsUrl: 'https://longcat.chat/platform/docs/zh/#%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B',
name: 'LongCat',
settings: {
disableBrowserRequest: false,
proxyUrl: {
placeholder: 'https://api.longcat.chat/openai/v1',
},
sdkType: 'openai',
showModelFetcher: false,
},
url: 'https://longcat.chat/platform/usage',
};
export default LongCat;

View File

@@ -19,6 +19,7 @@ export { LobeDeepSeekAI } from './providers/deepseek';
export { LobeGoogleAI } from './providers/google';
export { LobeGroq } from './providers/groq';
export { LobeHubAI } from './providers/lobehub';
export { LobeLongCatAI } from './providers/longcat';
export { LobeMinimaxAI } from './providers/minimax';
export { LobeMistralAI } from './providers/mistral';
export { LobeMoonshotAI } from './providers/moonshot';

View File

@@ -0,0 +1,13 @@
// @vitest-environment node
import { ModelProvider } from 'model-bank';
import { testProvider } from '../../providerTestUtils';
import { LobeLongCatAI } from './index';
testProvider({
Runtime: LobeLongCatAI,
provider: ModelProvider.LongCat,
defaultBaseURL: 'https://api.longcat.chat/openai/v1',
chatDebugEnv: 'DEBUG_LONGCAT_CHAT_COMPLETION',
chatModel: 'LongCat-Flash-Lite',
});

View File

@@ -0,0 +1,23 @@
import { ModelProvider } from 'model-bank';
import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory';
export const LobeLongCatAI = createOpenAICompatibleRuntime({
baseURL: 'https://api.longcat.chat/openai/v1',
chatCompletion: {
handlePayload: (payload) => {
const { frequency_penalty, presence_penalty, ...rest } = payload;
return {
...rest,
frequency_penalty: undefined,
presence_penalty: undefined,
stream: true,
} as any;
},
},
debug: {
chatCompletion: () => process.env.DEBUG_LONGCAT_CHAT_COMPLETION === '1',
},
provider: ModelProvider.LongCat,
});

View File

@@ -30,6 +30,7 @@ import { LobeInternLMAI } from './providers/internlm';
import { LobeJinaAI } from './providers/jina';
import { LobeLMStudioAI } from './providers/lmstudio';
import { LobeHubAI } from './providers/lobehub';
import { LobeLongCatAI } from './providers/longcat';
import { LobeMinimaxAI } from './providers/minimax';
import { LobeMistralAI } from './providers/mistral';
import { LobeModelScopeAI } from './providers/modelscope';
@@ -103,6 +104,7 @@ export const providerRuntimeMap = {
jina: LobeJinaAI,
lmstudio: LobeLMStudioAI,
lobehub: LobeHubAI,
longcat: LobeLongCatAI,
minimax: LobeMinimaxAI,
mistral: LobeMistralAI,
modelscope: LobeModelScopeAI,

View File

@@ -221,6 +221,9 @@ export const getLLMConfig = () => {
ENABLED_XIAOMIMIMO: z.boolean(),
XIAOMIMIMO_API_KEY: z.string().optional(),
ENABLED_LONGCAT: z.boolean(),
LONGCAT_API_KEY: z.string().optional(),
},
runtimeEnv: {
API_KEY_SELECT_MODE: process.env.API_KEY_SELECT_MODE,
@@ -441,6 +444,9 @@ export const getLLMConfig = () => {
ENABLED_XIAOMIMIMO: !!process.env.XIAOMIMIMO_API_KEY,
XIAOMIMIMO_API_KEY: process.env.XIAOMIMIMO_API_KEY,
ENABLED_LONGCAT: !!process.env.LONGCAT_API_KEY,
LONGCAT_API_KEY: process.env.LONGCAT_API_KEY,
},
});
};