♻️ refactor(model-runtime): extract Anthropic factory and convert Moonshot to RouterRuntime (#12109)

* ♻️ refactor(model-runtime): extract Anthropic provider into reusable factory

- Create `anthropicCompatibleFactory` for shared Anthropic-compatible logic
- Migrate Anthropic provider to use the new factory
- Migrate Moonshot provider from OpenAI-compatible to Anthropic-compatible API
- Move shared utilities (resolveCacheTTL, resolveMaxTokens, etc.) to factory

* ♻️ refactor(model-runtime): convert Moonshot to RouterRuntime with auto API format detection

- Add baseURLPattern support to RouterRuntime for URL-based routing
- Moonshot now auto-selects OpenAI or Anthropic format based on baseURL
  - /anthropic suffix -> Anthropic format (with kimi-k2.5 thinking)
  - /v1 or default -> OpenAI format
- Remove moonshot from baseRuntimeMap to avoid circular dependency
- Update model-bank config: default to OpenAI format with api.moonshot.ai/v1
- Export CreateRouterRuntimeOptions type from RouterRuntime
- Fix type annotation in Anthropic test

*  feat(model-bank): add Kimi K2.5 to LobeHub provider

- Add Kimi K2.5 model with multimodal capabilities
- Supports vision, reasoning, function calling, structured output, and search
- Context window: 262K tokens, max output: 32K tokens

* 🐛 fix(model-runtime): address PR review feedback

- Restore forceImageBase64 for Moonshot OpenAI runtime to fix vision requests
- Simplify baseURLPattern to only support RegExp (remove string support)
- Add baseURLPattern matching tests for RouterRuntime
This commit is contained in:
YuTengjing
2026-02-04 21:51:15 +08:00
committed by GitHub
parent 20928ac466
commit 71064fdede
17 changed files with 1476 additions and 871 deletions

View File

@@ -1,6 +1,35 @@
import { AIChatModelCard } from '../../../types/aiModel';
export const moonshotChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
reasoning: true,
search: true,
structuredOutput: true,
vision: true,
},
contextWindowTokens: 262_144,
description:
'Kimi K2.5 is Kimi\'s most versatile model to date, featuring a native multimodal architecture that supports both vision and text inputs, "thinking" and "non-thinking" modes, and both conversational and agent tasks.',
displayName: 'Kimi K2.5',
enabled: true,
id: 'kimi-k2.5',
maxOutput: 32_768,
pricing: {
units: [
{ name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
],
},
releasedAt: '2026-01-27',
settings: {
extendParams: ['enableReasoning'],
searchImpl: 'params',
},
type: 'chat',
},
{
abilities: {
functionCall: true,

View File

@@ -1,6 +1,5 @@
import { type ModelProviderCard } from '@/types/llm';
// ref: https://platform.moonshot.cn/docs/intro#model-list
const Moonshot: ModelProviderCard = {
chatModels: [],
checkModel: 'kimi-latest',
@@ -8,12 +7,12 @@ const Moonshot: ModelProviderCard = {
'Moonshot, from Moonshot AI (Beijing Moonshot Technology), offers multiple NLP models for use cases like content creation, research, recommendations, and medical analysis, with strong long-context and complex generation support.',
id: 'moonshot',
modelList: { showModelFetcher: true },
modelsUrl: 'https://platform.moonshot.cn/docs/intro',
modelsUrl: 'https://platform.moonshot.ai/docs/pricing/chat',
name: 'Moonshot',
settings: {
disableBrowserRequest: true, // CORS error
proxyUrl: {
placeholder: 'https://api.moonshot.cn/v1',
placeholder: 'https://api.moonshot.ai/v1',
},
responseAnimation: {
speed: 2,
@@ -22,7 +21,7 @@ const Moonshot: ModelProviderCard = {
sdkType: 'openai',
showModelFetcher: true,
},
url: 'https://www.moonshot.cn',
url: 'https://www.moonshot.ai/',
};
export default Moonshot;

View File

@@ -6,7 +6,6 @@ import { LobeDeepSeekAI } from '../../providers/deepseek';
import { LobeFalAI } from '../../providers/fal';
import { LobeGoogleAI } from '../../providers/google';
import { LobeMinimaxAI } from '../../providers/minimax';
import { LobeMoonshotAI } from '../../providers/moonshot';
import { LobeOpenAI } from '../../providers/openai';
import { LobeQwenAI } from '../../providers/qwen';
import { LobeVertexAI } from '../../providers/vertexai';
@@ -21,7 +20,6 @@ export const baseRuntimeMap = {
fal: LobeFalAI,
google: LobeGoogleAI,
minimax: LobeMinimaxAI,
moonshot: LobeMoonshotAI,
openai: LobeOpenAI,
qwen: LobeQwenAI,
vertexai: LobeVertexAI,

View File

@@ -452,6 +452,97 @@ describe('createRouterRuntime', () => {
});
describe('router matching', () => {
describe('baseURLPattern matching', () => {
it('should match router by baseURLPattern (RegExp)', async () => {
const mockChatOpenAI = vi.fn().mockResolvedValue('openai-response');
const mockChatAnthropic = vi.fn().mockResolvedValue('anthropic-response');
class OpenAIRuntime implements LobeRuntimeAI {
chat = mockChatOpenAI;
}
class AnthropicRuntime implements LobeRuntimeAI {
chat = mockChatAnthropic;
}
const Runtime = createRouterRuntime({
id: 'test-runtime',
routers: [
{
apiType: 'anthropic',
baseURLPattern: /\/anthropic\/?$/,
options: { apiKey: 'anthropic-key' },
runtime: AnthropicRuntime as any,
},
{
apiType: 'openai',
options: { apiKey: 'openai-key' },
runtime: OpenAIRuntime as any,
},
],
});
const runtime = new Runtime({
apiKey: 'test',
baseURL: 'https://api.example.com/anthropic',
});
const result = await runtime.chat({
model: 'test-model',
messages: [],
temperature: 0.7,
});
expect(result).toBe('anthropic-response');
expect(mockChatAnthropic).toHaveBeenCalled();
expect(mockChatOpenAI).not.toHaveBeenCalled();
});
it('should prioritize baseURLPattern over models matching', async () => {
const mockChatOpenAI = vi.fn().mockResolvedValue('openai-response');
const mockChatAnthropic = vi.fn().mockResolvedValue('anthropic-response');
class OpenAIRuntime implements LobeRuntimeAI {
chat = mockChatOpenAI;
}
class AnthropicRuntime implements LobeRuntimeAI {
chat = mockChatAnthropic;
}
const Runtime = createRouterRuntime({
id: 'test-runtime',
routers: [
{
apiType: 'anthropic',
baseURLPattern: /\/anthropic\/?$/,
options: { apiKey: 'anthropic-key' },
runtime: AnthropicRuntime as any,
models: ['claude-3'],
},
{
apiType: 'openai',
options: { apiKey: 'openai-key' },
runtime: OpenAIRuntime as any,
models: ['gpt-4', 'test-model'], // includes test-model
},
],
});
// Even though 'test-model' matches OpenAI router, baseURLPattern should win
const runtime = new Runtime({
apiKey: 'test',
baseURL: 'https://api.example.com/anthropic',
});
const result = await runtime.chat({
model: 'test-model',
messages: [],
temperature: 0.7,
});
expect(result).toBe('anthropic-response');
});
});
it('should fallback to last router when model does not match any', async () => {
const mockChatFirst = vi.fn().mockResolvedValue('first-response');
const mockChatLast = vi.fn().mockResolvedValue('last-response');

View File

@@ -58,10 +58,11 @@ interface RouterOptionItem extends ProviderIniOptions {
type RouterOptions = RouterOptionItem | RouterOptionItem[];
export type RuntimeClass = typeof LobeOpenAI;
export type RuntimeClass = new (options?: any) => LobeRuntimeAI;
interface RouterInstance {
apiType: keyof typeof baseRuntimeMap;
baseURLPattern?: RegExp;
models?: string[];
options: RouterOptions;
runtime?: RuntimeClass;
@@ -177,14 +178,25 @@ export const createRouterRuntime = ({
private async resolveMatchedRouter(model: string): Promise<RouterInstance> {
const resolvedRouters = await this.resolveRouters(model);
return (
resolvedRouters.find((router) => {
if (router.models && router.models.length > 0) {
return router.models.includes(model);
}
return false;
}) ?? resolvedRouters.at(-1)!
);
const baseURL = this._options.baseURL;
// Priority 1: Match by baseURLPattern (RegExp only)
if (baseURL) {
const baseURLMatch = resolvedRouters.find((router) => router.baseURLPattern?.test(baseURL));
if (baseURLMatch) return baseURLMatch;
}
// Priority 2: Match by models
const modelMatch = resolvedRouters.find((router) => {
if (router.models && router.models.length > 0) {
return router.models.includes(model);
}
return false;
});
if (modelMatch) return modelMatch;
// Fallback: Use the last router
return resolvedRouters.at(-1)!;
}
private normalizeRouterOptions(router: RouterInstance): RouterOptionItem[] {

View File

@@ -6,5 +6,5 @@ export interface RuntimeItem {
runtime: LobeRuntimeAI;
}
export type { UniformRuntime } from './createRuntime';
export type { CreateRouterRuntimeOptions, UniformRuntime } from './createRuntime';
export { createRouterRuntime } from './createRuntime';

View File

@@ -1,7 +1,7 @@
import type Anthropic from '@anthropic-ai/sdk';
import debug from 'debug';
import { buildAnthropicMessages, buildAnthropicTools } from '../../core/contextBuilders/anthropic';
import { buildAnthropicMessages, buildAnthropicTools } from '../contextBuilders/anthropic';
import { GenerateObjectOptions, GenerateObjectPayload } from '../../types';
const log = debug('lobe-model-runtime:anthropic:generate-object');

View File

@@ -0,0 +1,626 @@
import Anthropic, { ClientOptions } from '@anthropic-ai/sdk';
import type { Stream } from '@anthropic-ai/sdk/streaming';
import type { ChatModelCard } from '@lobechat/types';
import debug from 'debug';
import { hasTemperatureTopPConflict } from '../../const/models';
import {
buildAnthropicMessages,
buildAnthropicTools,
buildSearchTool,
} from '../contextBuilders/anthropic';
import { resolveParameters } from '../parameterResolver';
import {
ChatCompletionErrorPayload,
ChatMethodOptions,
ChatStreamCallbacks,
ChatStreamPayload,
GenerateObjectOptions,
GenerateObjectPayload,
} from '../../types';
import { AgentRuntimeErrorType, ILobeAgentRuntimeErrorType } from '../../types/error';
import { AgentRuntimeError } from '../../utils/createError';
import { debugStream } from '../../utils/debugStream';
import { desensitizeUrl } from '../../utils/desensitizeUrl';
import { getModelPricing } from '../../utils/getModelPricing';
import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
import { StreamingResponse } from '../../utils/response';
import { LobeRuntimeAI } from '../BaseAI';
import { AnthropicStream } from '../streams';
import type { ComputeChatCostOptions } from '../usageConverters/utils/computeChatCost';
import { createAnthropicGenerateObject } from './generateObject';
import { handleAnthropicError } from './handleAnthropicError';
import { resolveCacheTTL } from './resolveCacheTTL';
import { resolveMaxTokens } from './resolveMaxTokens';
type ConstructorOptions<T extends Record<string, any> = any> = ClientOptions & T;
type AnthropicTools = Anthropic.Tool | Anthropic.WebSearchTool20250305;
export const DEFAULT_ANTHROPIC_BASE_URL = 'https://api.anthropic.com';
export interface CustomClientOptions<T extends Record<string, any> = any> {
createClient?: (options: ConstructorOptions<T>) => Anthropic;
}
export interface AnthropicCompatibleFactoryOptions<T extends Record<string, any> = any> {
apiKey?: string;
baseURL?: string;
chatCompletion?: {
/**
* Build an Anthropic Messages API payload from ChatStreamPayload.
* This is required because Anthropic-compatible providers have different
* parameter constraints than OpenAI-compatible ones.
*/
getPricingOptions?: (
payload: ChatStreamPayload,
anthropicPayload: Anthropic.MessageCreateParams,
) => Promise<ComputeChatCostOptions | undefined> | ComputeChatCostOptions | undefined;
handleError?: (
error: any,
options: ConstructorOptions<T>,
) => Omit<ChatCompletionErrorPayload, 'provider'> | undefined;
handlePayload?: (
payload: ChatStreamPayload,
options: ConstructorOptions<T>,
) => Promise<Anthropic.MessageCreateParams> | Anthropic.MessageCreateParams;
handleStream?: (
stream: Stream<Anthropic.MessageStreamEvent> | ReadableStream,
{
callbacks,
inputStartAt,
payload,
}: { callbacks?: ChatStreamCallbacks; inputStartAt?: number; payload?: ChatStreamPayload },
) => ReadableStream;
};
constructorOptions?: ConstructorOptions<T>;
customClient?: CustomClientOptions<T>;
debug?: {
chatCompletion?: () => boolean;
};
errorType?: {
bizError: ILobeAgentRuntimeErrorType;
invalidAPIKey: ILobeAgentRuntimeErrorType;
};
generateObject?: (
client: Anthropic,
payload: GenerateObjectPayload,
options?: GenerateObjectOptions,
) => Promise<any>;
models?: (params: {
apiKey?: string;
baseURL: string;
client: Anthropic;
}) => Promise<ChatModelCard[]>;
provider: string;
}
export interface AnthropicCompatibleParamsInput<T extends Record<string, any> = any>
extends Omit<
AnthropicCompatibleFactoryOptions<T>,
'chatCompletion' | 'customClient' | 'generateObject' | 'models'
> {
chatCompletion?: Partial<NonNullable<AnthropicCompatibleFactoryOptions<T>['chatCompletion']>>;
customClient?: CustomClientOptions<T>;
generateObject?: AnthropicCompatibleFactoryOptions<T>['generateObject'];
models?: AnthropicCompatibleFactoryOptions<T>['models'];
}
/**
* Build the default Anthropic Messages payload with LobeChat normalization.
*/
export const buildDefaultAnthropicPayload = async (
payload: ChatStreamPayload,
): Promise<Anthropic.MessageCreateParams> => {
const {
messages,
model,
max_tokens,
temperature,
top_p,
tools,
thinking,
enabledContextCaching = true,
enabledSearch,
} = payload;
const { anthropic: anthropicModels } = await import('model-bank');
const resolvedMaxTokens = await resolveMaxTokens({
max_tokens,
model,
providerModels: anthropicModels,
thinking,
});
const systemMessage = messages.find((message) => message.role === 'system');
const userMessages = messages.filter((message) => message.role !== 'system');
const systemPrompts = systemMessage?.content
? ([
{
cache_control: enabledContextCaching ? { type: 'ephemeral' } : undefined,
text: systemMessage.content as string,
type: 'text',
},
] as Anthropic.TextBlockParam[])
: undefined;
const postMessages = await buildAnthropicMessages(userMessages, { enabledContextCaching });
let postTools = buildAnthropicTools(tools, { enabledContextCaching }) as
| AnthropicTools[]
| undefined;
if (enabledSearch) {
const webSearchTool = buildSearchTool();
postTools = postTools?.length ? [...postTools, webSearchTool] : [webSearchTool];
}
if (!!thinking && thinking.type === 'enabled') {
return {
max_tokens: resolvedMaxTokens,
messages: postMessages,
model,
system: systemPrompts,
thinking: {
...thinking,
budget_tokens: thinking?.budget_tokens
? Math.min(thinking.budget_tokens, resolvedMaxTokens - 1)
: 1024,
},
tools: postTools as Anthropic.MessageCreateParams['tools'],
} satisfies Anthropic.MessageCreateParams;
}
const hasConflict = hasTemperatureTopPConflict(model);
const resolvedParams = resolveParameters(
{ temperature, top_p },
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
);
return {
max_tokens: resolvedMaxTokens,
messages: postMessages,
model,
system: systemPrompts,
temperature: resolvedParams.temperature,
tools: postTools as Anthropic.MessageCreateParams['tools'],
top_p: resolvedParams.top_p,
} satisfies Anthropic.MessageCreateParams;
};
/**
* Resolve cache-aware pricing options for usage cost calculation.
*/
export const resolveDefaultAnthropicPricingOptions = (
requestPayload: ChatStreamPayload,
anthropicPayload: Anthropic.MessageCreateParams,
): ComputeChatCostOptions | undefined => {
const cacheTTL = resolveCacheTTL(requestPayload, {
messages: anthropicPayload.messages,
system: anthropicPayload.system,
});
if (!cacheTTL) return undefined;
return { lookupParams: { ttl: cacheTTL } };
};
/**
* Create Anthropic SDK client with optional beta headers.
*/
export const createDefaultAnthropicClient = <T extends Record<string, any> = any>(
options: ConstructorOptions<T>,
) => {
const betaHeaders = process.env.ANTHROPIC_BETA_HEADERS;
const defaultHeaders = {
...options.defaultHeaders,
...(betaHeaders ? { 'anthropic-beta': betaHeaders } : {}),
};
return new Anthropic({ ...options, defaultHeaders });
};
/**
* Default Anthropic error handler with desensitized endpoint.
*/
export const handleDefaultAnthropicError = <T extends Record<string, any> = any>(
error: any,
options: ConstructorOptions<T>,
): Omit<ChatCompletionErrorPayload, 'provider'> => {
const baseURL =
typeof options.baseURL === 'string' && options.baseURL
? options.baseURL
: DEFAULT_ANTHROPIC_BASE_URL;
const desensitizedEndpoint =
baseURL !== DEFAULT_ANTHROPIC_BASE_URL ? desensitizeUrl(baseURL) : baseURL;
if ('status' in (error as any)) {
switch ((error as Response).status) {
case 401: {
return {
endpoint: desensitizedEndpoint,
error: error as any,
errorType: AgentRuntimeErrorType.InvalidProviderAPIKey,
};
}
case 403: {
return {
endpoint: desensitizedEndpoint,
error: error as any,
errorType: AgentRuntimeErrorType.LocationNotSupportError,
};
}
default: {
break;
}
}
}
const { errorResult } = handleAnthropicError(error);
return {
endpoint: desensitizedEndpoint,
error: errorResult,
errorType: AgentRuntimeErrorType.ProviderBizError,
};
};
/**
* Default Anthropic models list fetcher.
*/
export const createDefaultAnthropicModels = async ({
apiKey,
baseURL,
}: {
apiKey?: string;
baseURL: string;
client?: Anthropic;
}): Promise<ChatModelCard[]> => {
if (!apiKey) {
throw new Error('Missing Anthropic API key for model listing');
}
const response = await fetch(`${baseURL}/v1/models`, {
headers: {
'anthropic-version': '2023-06-01',
'x-api-key': `${apiKey}`,
},
method: 'GET',
});
if (!response.ok) {
throw new Error(`Failed to fetch Anthropic models: ${response.status} ${response.statusText}`);
}
const json = await response.json();
const modelList = (json['data'] || []) as Array<{ created_at: string; display_name: string; id: string }>;
const standardModelList = modelList.map((model) => ({
created: model.created_at,
displayName: model.display_name,
id: model.id,
}));
return processModelList(standardModelList, MODEL_LIST_CONFIGS.anthropic, 'anthropic');
};
/**
* Build provider params by merging overrides with Anthropic defaults.
*/
export const createAnthropicCompatibleParams = <T extends Record<string, any> = any>(
options: AnthropicCompatibleParamsInput<T>,
): AnthropicCompatibleFactoryOptions<T> => {
const {
baseURL = DEFAULT_ANTHROPIC_BASE_URL,
chatCompletion,
customClient,
generateObject,
models,
...rest
} = options;
return {
...rest,
baseURL,
chatCompletion: {
getPricingOptions: resolveDefaultAnthropicPricingOptions,
handleError: handleDefaultAnthropicError,
handlePayload: buildDefaultAnthropicPayload,
...chatCompletion,
},
customClient: customClient ?? { createClient: createDefaultAnthropicClient },
generateObject: generateObject ?? createAnthropicGenerateObject,
models: models ?? createDefaultAnthropicModels,
} as AnthropicCompatibleFactoryOptions<T>;
};
export const createAnthropicCompatibleRuntime = <T extends Record<string, any> = any>({
provider,
baseURL: DEFAULT_BASE_URL = DEFAULT_ANTHROPIC_BASE_URL,
apiKey: DEFAULT_API_KEY,
errorType,
debug: debugParams,
constructorOptions,
chatCompletion,
customClient,
models,
generateObject,
}: AnthropicCompatibleFactoryOptions<T>) => {
const ErrorType = {
bizError: errorType?.bizError || AgentRuntimeErrorType.ProviderBizError,
invalidAPIKey: errorType?.invalidAPIKey || AgentRuntimeErrorType.InvalidProviderAPIKey,
};
return class LobeAnthropicCompatibleAI implements LobeRuntimeAI {
client!: Anthropic;
private id: string;
private logPrefix: string;
baseURL!: string;
protected _options: ConstructorOptions<T>;
constructor(options: ClientOptions & Record<string, any> = {}) {
const resolvedOptions = {
...options,
apiKey: options.apiKey?.trim() || DEFAULT_API_KEY,
baseURL: options.baseURL?.trim() || DEFAULT_BASE_URL,
};
const { apiKey, baseURL = DEFAULT_BASE_URL, ...rest } = resolvedOptions;
this._options = resolvedOptions as ConstructorOptions<T>;
if (!apiKey) throw AgentRuntimeError.createError(ErrorType.invalidAPIKey);
const initOptions = { apiKey, baseURL, ...constructorOptions, ...rest };
if (customClient?.createClient) {
this.client = customClient.createClient(initOptions as ConstructorOptions<T>);
} else {
this.client = new Anthropic(initOptions as ConstructorOptions<T>);
}
this.baseURL = baseURL || this.client.baseURL;
this.id = options.id || provider;
this.logPrefix = `lobe-model-runtime:${this.id}`;
}
async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) {
try {
if (!chatCompletion?.handlePayload) {
throw new Error('Anthropic-compatible runtime requires chatCompletion.handlePayload');
}
const log = debug(`${this.logPrefix}:chat`);
const inputStartAt = Date.now();
log('chat called with model: %s, stream: %s', payload.model, payload.stream ?? true);
const postPayload = await chatCompletion.handlePayload(payload, this._options);
const shouldStream = postPayload.stream ?? payload.stream ?? true;
const finalPayload = { ...postPayload, stream: shouldStream };
if (debugParams?.chatCompletion?.()) {
console.log('[requestPayload]');
console.log(JSON.stringify(finalPayload), '\n');
}
const response = await this.client.messages.create(
{
...finalPayload,
metadata: options?.user ? { user_id: options.user } : undefined,
},
{
headers: options?.requestHeaders,
signal: options?.signal,
},
);
const pricing = await getModelPricing(payload.model, this.id);
const pricingOptions = await chatCompletion?.getPricingOptions?.(payload, postPayload);
const streamOptions = {
callbacks: options?.callback,
payload: {
model: payload.model,
pricing,
pricingOptions,
provider: this.id,
},
};
if (shouldStream) {
const streamResponse = response as Stream<Anthropic.MessageStreamEvent>;
const [prod, useForDebug] = streamResponse.tee();
if (debugParams?.chatCompletion?.()) {
const useForDebugStream =
useForDebug instanceof ReadableStream ? useForDebug : useForDebug.toReadableStream();
debugStream(useForDebugStream).catch(console.error);
}
return StreamingResponse(
chatCompletion?.handleStream
? chatCompletion.handleStream(prod, {
callbacks: streamOptions.callbacks,
inputStartAt,
payload,
})
: AnthropicStream(prod, { ...streamOptions, inputStartAt }),
{
headers: options?.headers,
},
);
}
if (payload.responseMode === 'json') {
return Response.json(response);
}
const stream = new ReadableStream<Anthropic.MessageStreamEvent>({
start(controller) {
const message = response as Anthropic.Message;
controller.enqueue({
message,
type: 'message_start',
} satisfies Anthropic.MessageStreamEvent);
message.content?.forEach((block, index) => {
if (block.type === 'tool_use' || block.type === 'server_tool_use') {
controller.enqueue({
content_block: block,
index,
type: 'content_block_start',
} satisfies Anthropic.MessageStreamEvent);
controller.enqueue({
delta: { partial_json: JSON.stringify(block.input ?? {}), type: 'input_json_delta' },
index,
type: 'content_block_delta',
} satisfies Anthropic.MessageStreamEvent);
}
if (block.type === 'thinking' || block.type === 'redacted_thinking') {
controller.enqueue({
content_block: block,
index,
type: 'content_block_start',
} satisfies Anthropic.MessageStreamEvent);
}
if (block.type === 'text') {
controller.enqueue({
delta: { text: block.text, type: 'text_delta' },
index,
type: 'content_block_delta',
} satisfies Anthropic.MessageStreamEvent);
}
});
controller.enqueue({
delta: {
stop_reason: message.stop_reason,
stop_sequence: message.stop_sequence ?? null,
},
type: 'message_delta',
usage: {
cache_creation_input_tokens: message.usage?.cache_creation_input_tokens ?? null,
cache_read_input_tokens: message.usage?.cache_read_input_tokens ?? null,
input_tokens: message.usage?.input_tokens ?? null,
output_tokens: 0,
server_tool_use: message.usage?.server_tool_use ?? null,
},
} satisfies Anthropic.MessageStreamEvent);
controller.enqueue({ type: 'message_stop' } satisfies Anthropic.MessageStreamEvent);
controller.close();
},
});
return StreamingResponse(
chatCompletion?.handleStream
? chatCompletion.handleStream(stream, {
callbacks: streamOptions.callbacks,
inputStartAt,
payload,
})
: AnthropicStream(stream, {
...streamOptions,
enableStreaming: false,
inputStartAt,
}),
{
headers: options?.headers,
},
);
} catch (error) {
throw this.handleError(error);
}
}
async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) {
if (!generateObject) {
throw new Error('GenerateObject is not supported by this provider');
}
try {
return await generateObject(this.client, payload, options);
} catch (error) {
throw this.handleError(error);
}
}
async models() {
if (!models) return [];
return models({
apiKey: this._options.apiKey ?? undefined,
baseURL: this.baseURL,
client: this.client,
});
}
protected handleError(error: any): ChatCompletionErrorPayload {
const log = debug(`${this.logPrefix}:error`);
log('handling error: %O', error);
let desensitizedEndpoint = this.baseURL;
if (this.baseURL !== DEFAULT_BASE_URL) {
desensitizedEndpoint = desensitizeUrl(this.baseURL);
}
if (chatCompletion?.handleError) {
const errorResult = chatCompletion.handleError(error, this._options);
if (errorResult)
return AgentRuntimeError.chat({
...errorResult,
provider: this.id,
} as ChatCompletionErrorPayload);
}
if ('status' in (error as any)) {
switch ((error as Response).status) {
case 401: {
return AgentRuntimeError.chat({
endpoint: desensitizedEndpoint,
error: error as any,
errorType: ErrorType.invalidAPIKey,
provider: this.id,
});
}
case 403: {
return AgentRuntimeError.chat({
endpoint: desensitizedEndpoint,
error: error as any,
errorType: AgentRuntimeErrorType.LocationNotSupportError,
provider: this.id,
});
}
default: {
break;
}
}
}
const errorResult = (() => {
if (error?.error) {
const innerError = error.error;
if ('error' in innerError) {
return innerError.error;
}
return innerError;
}
return { headers: error?.headers, stack: error?.stack, status: error?.status };
})();
return AgentRuntimeError.chat({
endpoint: desensitizedEndpoint,
error: errorResult,
errorType: ErrorType.bizError,
provider: this.id,
});
}
};
};

View File

@@ -1,6 +1,7 @@
// @vitest-environment node
import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { buildDefaultAnthropicPayload } from '../../core/anthropicCompatibleFactory';
import * as anthropicHelpers from '../../core/contextBuilders/anthropic';
import { ChatCompletionTool, ChatStreamPayload } from '../../types/chat';
import * as debugStreamModule from '../../utils/debugStream';
@@ -14,13 +15,13 @@ const invalidErrorType = 'InvalidProviderAPIKey';
// Mock the console.error to avoid polluting test output
vi.spyOn(console, 'error').mockImplementation(() => {});
let instance: LobeAnthropicAI;
let instance: InstanceType<typeof LobeAnthropicAI>;
beforeEach(() => {
instance = new LobeAnthropicAI({ apiKey: 'test' });
// 使用 vi.spyOn 来模拟 chat.completions.create 方法
vi.spyOn(instance['client'].messages, 'create').mockReturnValue(new ReadableStream() as any);
// Use vi.spyOn to mock the Anthropic messages.create call.
vi.spyOn(instance['client'].messages, 'create').mockResolvedValue(new ReadableStream() as any);
});
afterEach(() => {
@@ -87,7 +88,7 @@ describe('LobeAnthropicAI', () => {
// Assert
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
expect.objectContaining({
max_tokens: 4096,
messages: [
{
@@ -99,8 +100,8 @@ describe('LobeAnthropicAI', () => {
stream: true,
temperature: 0,
top_p: 1,
},
{},
}),
expect.objectContaining({}),
);
expect(result).toBeInstanceOf(Response);
});
@@ -128,7 +129,7 @@ describe('LobeAnthropicAI', () => {
// Assert
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
expect.objectContaining({
max_tokens: 64000,
messages: [
{
@@ -149,10 +150,8 @@ describe('LobeAnthropicAI', () => {
metadata: undefined,
tools: undefined,
top_p: undefined,
},
{
signal: undefined,
},
}),
expect.objectContaining({ signal: undefined }),
);
expect(result).toBeInstanceOf(Response);
});
@@ -179,7 +178,7 @@ describe('LobeAnthropicAI', () => {
// Assert
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
expect.objectContaining({
max_tokens: 2048,
messages: [
{
@@ -191,8 +190,8 @@ describe('LobeAnthropicAI', () => {
stream: true,
temperature: 0.25,
top_p: 1,
},
{},
}),
expect.objectContaining({}),
);
expect(result).toBeInstanceOf(Response);
});
@@ -221,7 +220,7 @@ describe('LobeAnthropicAI', () => {
// Assert
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
expect.objectContaining({
max_tokens: 2048,
messages: [
{
@@ -233,8 +232,8 @@ describe('LobeAnthropicAI', () => {
stream: true,
temperature: 0.25,
top_p: 1,
},
{},
}),
expect.objectContaining({}),
);
expect(result).toBeInstanceOf(Response);
});
@@ -321,20 +320,19 @@ describe('LobeAnthropicAI', () => {
enabledSearch: true,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(anthropicHelpers.buildAnthropicTools).toHaveBeenCalledWith(tools, {
enabledContextCaching: true,
});
// Should include both the converted tools and web search tool
expect(result.tools).toEqual([
...mockAnthropicTools,
{
name: 'web_search',
type: 'web_search_20250305',
},
]);
expect(result.tools).toEqual(
expect.arrayContaining([
expect.objectContaining({ name: 'tool1' }),
expect.objectContaining({ name: 'web_search', type: 'web_search_20250305' }),
]),
);
});
it('should build payload with web search enabled but no other tools', async () => {
@@ -347,19 +345,18 @@ describe('LobeAnthropicAI', () => {
enabledSearch: true,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(anthropicHelpers.buildAnthropicTools).toHaveBeenCalledWith(undefined, {
enabledContextCaching: true,
});
// Should only include web search tool
expect(result.tools).toEqual([
{
name: 'web_search',
type: 'web_search_20250305',
},
]);
expect(result.tools).toEqual(
expect.arrayContaining([
expect.objectContaining({ name: 'web_search', type: 'web_search_20250305' }),
]),
);
});
});
@@ -520,7 +517,7 @@ describe('LobeAnthropicAI', () => {
// Assert
expect(instance['client'].messages.create).toHaveBeenCalledWith(
expect.objectContaining({}),
{ signal: controller.signal },
expect.objectContaining({ signal: controller.signal }),
);
});
@@ -576,7 +573,7 @@ describe('LobeAnthropicAI', () => {
});
});
describe('buildAnthropicPayload', () => {
describe('buildDefaultAnthropicPayload', () => {
it('should correctly build payload with user messages only', async () => {
const payload: ChatStreamPayload = {
messages: [{ content: 'Hello', role: 'user' }],
@@ -584,19 +581,21 @@ describe('LobeAnthropicAI', () => {
temperature: 0.5,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
temperature: 0.25,
});
expect(result).toEqual(
expect.objectContaining({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
temperature: 0.25,
}),
);
});
it('should correctly build payload with system message', async () => {
@@ -609,26 +608,28 @@ describe('LobeAnthropicAI', () => {
temperature: 0.7,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
system: [
{
cache_control: { type: 'ephemeral' },
text: 'You are a helpful assistant',
type: 'text',
},
],
temperature: 0.35,
});
expect(result).toEqual(
expect.objectContaining({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
system: [
{
cache_control: { type: 'ephemeral' },
text: 'You are a helpful assistant',
type: 'text',
},
],
temperature: 0.35,
}),
);
});
it('should correctly build payload with tools', async () => {
@@ -650,20 +651,24 @@ describe('LobeAnthropicAI', () => {
tools,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Use a tool', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
temperature: 0.4,
tools: [{ name: 'tool1', description: 'desc1' }],
});
expect(result).toEqual(
expect.objectContaining({
max_tokens: 4096,
messages: [
{
content: [
{ cache_control: { type: 'ephemeral' }, text: 'Use a tool', type: 'text' },
],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
temperature: 0.4,
tools: [{ name: 'tool1', description: 'desc1' }],
}),
);
expect(spyOn).toHaveBeenCalledWith(tools, {
enabledContextCaching: true,
@@ -678,7 +683,7 @@ describe('LobeAnthropicAI', () => {
thinking: { type: 'enabled', budget_tokens: 0 },
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 4096,
@@ -706,7 +711,7 @@ describe('LobeAnthropicAI', () => {
thinking: { type: 'enabled', budget_tokens: 0 },
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 1000,
@@ -734,7 +739,7 @@ describe('LobeAnthropicAI', () => {
thinking: { type: 'enabled', budget_tokens: 2000 },
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 1000,
@@ -762,7 +767,7 @@ describe('LobeAnthropicAI', () => {
thinking: { type: 'enabled', budget_tokens: 60000 },
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result).toEqual({
max_tokens: 10000,
@@ -788,7 +793,7 @@ describe('LobeAnthropicAI', () => {
temperature: 0.7,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result.max_tokens).toBe(4096);
});
@@ -801,7 +806,7 @@ describe('LobeAnthropicAI', () => {
temperature: 0.7,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result.max_tokens).toBe(2000);
});
@@ -813,7 +818,7 @@ describe('LobeAnthropicAI', () => {
temperature: 1.0,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result.temperature).toBe(0.5); // Anthropic uses 0-1 scale, so divide by 2
});
@@ -829,7 +834,7 @@ describe('LobeAnthropicAI', () => {
// Delete the temperature property to simulate it not being provided
delete (partialPayload as any).temperature;
const result = await instance['buildAnthropicPayload'](partialPayload);
const result = await buildDefaultAnthropicPayload(partialPayload);
expect(result.temperature).toBeUndefined();
});
@@ -843,7 +848,7 @@ describe('LobeAnthropicAI', () => {
top_p: 0.9,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result.top_p).toBeUndefined();
});
@@ -856,7 +861,7 @@ describe('LobeAnthropicAI', () => {
top_p: 0.9,
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
expect(result.top_p).toBe(0.9);
});
@@ -869,20 +874,22 @@ describe('LobeAnthropicAI', () => {
thinking: { type: 'disabled', budget_tokens: 0 },
};
const result = await instance['buildAnthropicPayload'](payload);
const result = await buildDefaultAnthropicPayload(payload);
// When thinking is disabled, it should be treated as if thinking wasn't provided
expect(result).toEqual({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
temperature: 0.35,
});
expect(result).toEqual(
expect.objectContaining({
max_tokens: 4096,
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
temperature: 0.35,
}),
);
});
});
});

View File

@@ -1,286 +1,17 @@
import Anthropic, { ClientOptions } from '@anthropic-ai/sdk';
import { ModelProvider } from 'model-bank';
import { hasTemperatureTopPConflict } from '../../const/models';
import { LobeRuntimeAI } from '../../core/BaseAI';
import {
buildAnthropicMessages,
buildAnthropicTools,
buildSearchTool,
} from '../../core/contextBuilders/anthropic';
import { resolveParameters } from '../../core/parameterResolver';
import { AnthropicStream } from '../../core/streams';
import {
type ChatCompletionErrorPayload,
ChatMethodOptions,
ChatStreamPayload,
GenerateObjectOptions,
GenerateObjectPayload,
} from '../../types';
import { AgentRuntimeErrorType } from '../../types/error';
import { AgentRuntimeError } from '../../utils/createError';
import { debugStream } from '../../utils/debugStream';
import { desensitizeUrl } from '../../utils/desensitizeUrl';
import { getModelPricing } from '../../utils/getModelPricing';
import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
import { StreamingResponse } from '../../utils/response';
import { createAnthropicGenerateObject } from './generateObject';
import { handleAnthropicError } from './handleAnthropicError';
import { resolveCacheTTL } from './resolveCacheTTL';
import { resolveMaxTokens } from './resolveMaxTokens';
createAnthropicCompatibleParams,
createAnthropicCompatibleRuntime,
} from '../../core/anthropicCompatibleFactory';
export interface AnthropicModelCard {
created_at: string;
display_name: string;
id: string;
}
export const params = createAnthropicCompatibleParams({
debug: {
chatCompletion: () => process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1',
},
provider: ModelProvider.Anthropic,
});
type anthropicTools = Anthropic.Tool | Anthropic.WebSearchTool20250305;
const DEFAULT_BASE_URL = 'https://api.anthropic.com';
interface AnthropicAIParams extends ClientOptions {
id?: string;
}
export class LobeAnthropicAI implements LobeRuntimeAI {
private client: Anthropic;
baseURL: string;
apiKey?: string;
private id: string;
private isDebug() {
return process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1';
}
constructor({
apiKey,
baseURL = DEFAULT_BASE_URL,
id,
defaultHeaders,
...res
}: AnthropicAIParams = {}) {
if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
const betaHeaders = process.env.ANTHROPIC_BETA_HEADERS;
this.client = new Anthropic({
apiKey,
baseURL,
defaultHeaders: { ...defaultHeaders, 'anthropic-beta': betaHeaders },
...res,
});
this.baseURL = this.client.baseURL;
this.apiKey = apiKey;
this.id = id || ModelProvider.Anthropic;
}
async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) {
try {
const anthropicPayload = await this.buildAnthropicPayload(payload);
const inputStartAt = Date.now();
if (this.isDebug()) {
console.log('[requestPayload]');
console.log(JSON.stringify(anthropicPayload), '\n');
}
const response = await this.client.messages.create(
{
...anthropicPayload,
metadata: options?.user ? { user_id: options?.user } : undefined,
stream: true,
},
{
signal: options?.signal,
},
);
const [prod, debug] = response.tee();
if (this.isDebug()) {
debugStream(debug.toReadableStream()).catch(console.error);
}
const pricing = await getModelPricing(payload.model, this.id);
const cacheTTL = resolveCacheTTL(payload, anthropicPayload);
const pricingOptions = cacheTTL ? { lookupParams: { ttl: cacheTTL } } : undefined;
return StreamingResponse(
AnthropicStream(prod, {
callbacks: options?.callback,
inputStartAt,
payload: { model: payload.model, pricing, pricingOptions, provider: this.id },
}),
{
headers: options?.headers,
},
);
} catch (error) {
throw this.handleError(error);
}
}
async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) {
try {
return await createAnthropicGenerateObject(this.client, payload, options);
} catch (error) {
throw this.handleError(error);
}
}
private async buildAnthropicPayload(payload: ChatStreamPayload) {
const {
messages,
model,
max_tokens,
temperature,
top_p,
tools,
thinking,
enabledContextCaching = true,
enabledSearch,
} = payload;
const { anthropic: anthropicModels } = await import('model-bank');
const resolvedMaxTokens = await resolveMaxTokens({
max_tokens,
model,
providerModels: anthropicModels,
thinking,
});
const system_message = messages.find((m) => m.role === 'system');
const user_messages = messages.filter((m) => m.role !== 'system');
const systemPrompts = !!system_message?.content
? ([
{
cache_control: enabledContextCaching ? { type: 'ephemeral' } : undefined,
text: system_message?.content as string,
type: 'text',
},
] as Anthropic.TextBlockParam[])
: undefined;
const postMessages = await buildAnthropicMessages(user_messages, { enabledContextCaching });
let postTools: anthropicTools[] | undefined = buildAnthropicTools(tools, {
enabledContextCaching,
});
if (enabledSearch) {
const webSearchTool = buildSearchTool();
if (postTools && postTools.length > 0) {
postTools = [...postTools, webSearchTool];
} else {
postTools = [webSearchTool];
}
}
if (!!thinking && thinking.type === 'enabled') {
// `temperature` may only be set to 1 when thinking is enabled.
// `top_p` must be unset when thinking is enabled.
return {
max_tokens: resolvedMaxTokens,
messages: postMessages,
model,
system: systemPrompts,
thinking: {
...thinking,
budget_tokens: thinking?.budget_tokens
? Math.min(thinking.budget_tokens, resolvedMaxTokens - 1) // `max_tokens` must be greater than `thinking.budget_tokens`.
: 1024,
},
tools: postTools,
} satisfies Anthropic.MessageCreateParams;
}
// Resolve temperature and top_p parameters based on model constraints
const hasConflict = hasTemperatureTopPConflict(model);
const resolvedParams = resolveParameters(
{ temperature, top_p },
{ hasConflict, normalizeTemperature: true, preferTemperature: true },
);
return {
// claude 3 series model hax max output token of 4096, 3.x series has 8192
// https://docs.anthropic.com/en/docs/about-claude/models/all-models#:~:text=200K-,Max%20output,-Normal%3A
max_tokens: resolvedMaxTokens,
messages: postMessages,
model,
system: systemPrompts,
temperature: resolvedParams.temperature,
tools: postTools,
top_p: resolvedParams.top_p,
} satisfies Anthropic.MessageCreateParams;
}
async models() {
const url = `${this.baseURL}/v1/models`;
const response = await fetch(url, {
headers: {
'anthropic-version': '2023-06-01',
'x-api-key': `${this.apiKey}`,
},
method: 'GET',
});
const json = await response.json();
const modelList: AnthropicModelCard[] = json['data'];
const standardModelList = modelList.map((model) => ({
created: model.created_at,
displayName: model.display_name,
id: model.id,
}));
return processModelList(standardModelList, MODEL_LIST_CONFIGS.anthropic, 'anthropic');
}
private handleError(error: any): ChatCompletionErrorPayload {
let desensitizedEndpoint = this.baseURL;
if (this.baseURL !== DEFAULT_BASE_URL) {
desensitizedEndpoint = desensitizeUrl(this.baseURL);
}
if ('status' in (error as any)) {
switch ((error as Response).status) {
case 401: {
throw AgentRuntimeError.chat({
endpoint: desensitizedEndpoint,
error: error as any,
errorType: AgentRuntimeErrorType.InvalidProviderAPIKey,
provider: this.id,
});
}
case 403: {
throw AgentRuntimeError.chat({
endpoint: desensitizedEndpoint,
error: error as any,
errorType: AgentRuntimeErrorType.LocationNotSupportError,
provider: this.id,
});
}
default: {
break;
}
}
}
const { errorResult } = handleAnthropicError(error);
throw AgentRuntimeError.chat({
endpoint: desensitizedEndpoint,
error: errorResult,
errorType: AgentRuntimeErrorType.ProviderBizError,
provider: this.id,
});
}
}
export const LobeAnthropicAI = createAnthropicCompatibleRuntime(params);
export default LobeAnthropicAI;

View File

@@ -28,8 +28,8 @@ import { AgentRuntimeError } from '../../utils/createError';
import { debugStream } from '../../utils/debugStream';
import { getModelPricing } from '../../utils/getModelPricing';
import { StreamingResponse } from '../../utils/response';
import { resolveCacheTTL } from '../anthropic/resolveCacheTTL';
import { resolveMaxTokens } from '../anthropic/resolveMaxTokens';
import { resolveCacheTTL } from '../../core/anthropicCompatibleFactory/resolveCacheTTL';
import { resolveMaxTokens } from '../../core/anthropicCompatibleFactory/resolveMaxTokens';
/**
* A prompt constructor for HuggingFace LLama 2 chat models.

View File

@@ -1,57 +1,97 @@
// @vitest-environment node
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import OpenAI from 'openai';
import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { LobeOpenAICompatibleRuntime } from '../../core/BaseAI';
import { testProvider } from '../../providerTestUtils';
import { LobeMoonshotAI, params } from './index';
import {
LobeMoonshotAI,
LobeMoonshotAnthropicAI,
LobeMoonshotOpenAI,
anthropicParams,
params,
} from './index';
const provider = 'moonshot';
const defaultBaseURL = 'https://api.moonshot.cn/v1';
const defaultOpenAIBaseURL = 'https://api.moonshot.ai/v1';
const anthropicBaseURL = 'https://api.moonshot.ai/anthropic';
testProvider({
Runtime: LobeMoonshotAI,
provider,
defaultBaseURL,
chatDebugEnv: 'DEBUG_MOONSHOT_CHAT_COMPLETION',
chatModel: 'moonshot-v1-8k',
test: {
skipAPICall: true,
},
});
// Mock the console.error to avoid polluting test output
// Mock the console.error and console.warn to avoid polluting test output
vi.spyOn(console, 'error').mockImplementation(() => {});
vi.spyOn(console, 'warn').mockImplementation(() => {});
let instance: LobeOpenAICompatibleRuntime;
describe('LobeMoonshotAI', () => {
describe('RouterRuntime baseURL routing', () => {
it('should route to OpenAI format by default', async () => {
const runtime = new LobeMoonshotAI({ apiKey: 'test' });
expect(runtime).toBeInstanceOf(LobeMoonshotAI);
});
beforeEach(() => {
instance = new LobeMoonshotAI({ apiKey: 'test' });
it('should route to OpenAI format when baseURL ends with /v1', async () => {
const runtime = new LobeMoonshotAI({
apiKey: 'test',
baseURL: 'https://api.moonshot.ai/v1',
});
expect(runtime).toBeInstanceOf(LobeMoonshotAI);
});
// 使用 vi.spyOn 来模拟 chat.completions.create 方法
vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
new ReadableStream() as any,
);
});
it('should route to Anthropic format when baseURL ends with /anthropic', async () => {
const runtime = new LobeMoonshotAI({
apiKey: 'test',
baseURL: 'https://api.moonshot.ai/anthropic',
});
expect(runtime).toBeInstanceOf(LobeMoonshotAI);
});
afterEach(() => {
vi.clearAllMocks();
});
it('should route to Anthropic format when baseURL ends with /anthropic/', async () => {
const runtime = new LobeMoonshotAI({
apiKey: 'test',
baseURL: 'https://api.moonshot.ai/anthropic/',
});
expect(runtime).toBeInstanceOf(LobeMoonshotAI);
});
});
describe('LobeMoonshotAI - custom features', () => {
describe('Debug Configuration', () => {
it('should disable debug by default', () => {
delete process.env.DEBUG_MOONSHOT_CHAT_COMPLETION;
const result = params.debug.chatCompletion();
const result = anthropicParams.debug!.chatCompletion!();
expect(result).toBe(false);
});
it('should enable debug when env is set', () => {
process.env.DEBUG_MOONSHOT_CHAT_COMPLETION = '1';
const result = params.debug.chatCompletion();
const result = anthropicParams.debug!.chatCompletion!();
expect(result).toBe(true);
delete process.env.DEBUG_MOONSHOT_CHAT_COMPLETION;
});
});
});
describe('LobeMoonshotOpenAI', () => {
let instance: InstanceType<typeof LobeMoonshotOpenAI>;
const getLastRequestPayload = () => {
const calls = ((instance as any).client.chat.completions.create as Mock).mock.calls;
return calls[calls.length - 1]?.[0];
};
beforeEach(() => {
instance = new LobeMoonshotOpenAI({ apiKey: 'test' });
vi.spyOn((instance as any).client.chat.completions, 'create').mockResolvedValue(
new ReadableStream() as any,
);
});
afterEach(() => {
vi.clearAllMocks();
});
describe('init', () => {
it('should correctly initialize with an API key', async () => {
const runtime = new LobeMoonshotOpenAI({ apiKey: 'test_api_key' });
expect(runtime).toBeInstanceOf(LobeMoonshotOpenAI);
expect((runtime as any).baseURL).toEqual(defaultOpenAIBaseURL);
});
});
describe('handlePayload', () => {
describe('empty assistant messages', () => {
@@ -66,16 +106,12 @@ describe('LobeMoonshotAI - custom features', () => {
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [
{ content: 'Hello', role: 'user' },
{ content: ' ', role: 'assistant' },
{ content: 'Follow-up', role: 'user' },
],
}),
expect.anything(),
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toBe(' ');
});
it('should replace null content assistant message with a space', async () => {
@@ -88,36 +124,12 @@ describe('LobeMoonshotAI - custom features', () => {
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [
{ content: 'Hello', role: 'user' },
{ content: ' ', role: 'assistant' },
],
}),
expect.anything(),
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
});
it('should replace undefined content assistant message with a space', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: undefined as any, role: 'assistant' },
],
model: 'moonshot-v1-8k',
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [
{ content: 'Hello', role: 'user' },
{ content: ' ', role: 'assistant' },
],
}),
expect.anything(),
);
expect(assistantMessage?.content).toBe(' ');
});
it('should not modify non-empty assistant messages', async () => {
@@ -130,43 +142,193 @@ describe('LobeMoonshotAI - custom features', () => {
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [
{ content: 'Hello', role: 'user' },
{ content: 'I am here', role: 'assistant' },
],
}),
expect.anything(),
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toBe('I am here');
});
});
describe('web search functionality', () => {
it('should add web_search tool when enabledSearch is true', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
enabledSearch: true,
});
const payload = getLastRequestPayload();
expect(payload.tools).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'builtin_function',
function: { name: '$web_search' },
}),
]),
);
});
it('should not modify user or system messages', async () => {
it('should not add web_search tool when enabledSearch is false', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
enabledSearch: false,
});
const payload = getLastRequestPayload();
expect(payload.tools).toBeUndefined();
});
});
describe('temperature normalization', () => {
it('should normalize temperature (divide by 2)', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0.8,
});
const payload = getLastRequestPayload();
expect(payload.temperature).toBe(0.4);
});
it('should normalize temperature to 0.5 when temperature is 1', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 1,
});
const payload = getLastRequestPayload();
expect(payload.temperature).toBe(0.5);
});
it('should normalize temperature to 0 when temperature is 0', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
});
const payload = getLastRequestPayload();
expect(payload.temperature).toBe(0);
});
it('should handle kimi-k2.5 model with thinking enabled by default', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
temperature: 0.5,
top_p: 0.8,
});
const payload = getLastRequestPayload();
expect(payload.temperature).toBe(1);
expect(payload.top_p).toBe(0.95);
expect(payload.frequency_penalty).toBe(0);
expect(payload.presence_penalty).toBe(0);
expect(payload.thinking).toEqual({ type: 'enabled' });
});
it('should handle kimi-k2.5 model with thinking disabled', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
thinking: { budget_tokens: 0, type: 'disabled' },
});
const payload = getLastRequestPayload();
expect(payload.temperature).toBe(0.6);
expect(payload.thinking).toEqual({ type: 'disabled' });
});
});
describe('interleaved thinking', () => {
it('should convert reasoning to reasoning_content for assistant messages', async () => {
await instance.chat({
messages: [
{ content: '', role: 'system' },
{ content: '', role: 'user' },
{ content: 'Hello', role: 'user' },
{
content: 'Response',
role: 'assistant',
reasoning: { content: 'My reasoning process' },
} as any,
],
model: 'moonshot-v1-8k',
temperature: 0.5,
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.reasoning_content).toBe('My reasoning process');
expect(assistantMessage?.reasoning).toBeUndefined();
});
});
});
});
describe('LobeMoonshotAnthropicAI', () => {
let instance: InstanceType<typeof LobeMoonshotAnthropicAI>;
const getLastRequestPayload = () => {
const calls = ((instance as any).client.messages.create as Mock).mock.calls;
return calls[calls.length - 1]?.[0];
};
beforeEach(() => {
instance = new LobeMoonshotAnthropicAI({ apiKey: 'test' });
vi.spyOn((instance as any).client.messages, 'create').mockResolvedValue(
new ReadableStream() as any,
);
});
afterEach(() => {
vi.clearAllMocks();
});
describe('init', () => {
it('should correctly initialize with an API key', async () => {
const runtime = new LobeMoonshotAnthropicAI({ apiKey: 'test_api_key' });
expect(runtime).toBeInstanceOf(LobeMoonshotAnthropicAI);
expect((runtime as any).baseURL).toEqual(anthropicBaseURL);
});
});
describe('handlePayload', () => {
describe('empty assistant messages', () => {
it('should replace empty string assistant message with a space', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: '', role: 'assistant' },
{ content: 'Follow-up', role: 'user' },
],
model: 'moonshot-v1-8k',
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [
{ content: '', role: 'system' },
{ content: '', role: 'user' },
{ content: ' ', role: 'assistant' },
],
}),
expect.anything(),
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([expect.objectContaining({ text: ' ' })]),
);
});
});
describe('web search functionality', () => {
it('should add $web_search tool when enabledSearch is true', async () => {
it('should add web_search tool when enabledSearch is true', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
@@ -174,109 +336,15 @@ describe('LobeMoonshotAI - custom features', () => {
enabledSearch: true,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
tools: [
{
function: {
name: '$web_search',
},
type: 'builtin_function',
},
],
}),
expect.anything(),
);
});
const payload = getLastRequestPayload();
it('should add $web_search tool along with existing tools when enabledSearch is true', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
enabledSearch: true,
tools: [
{
type: 'function',
function: { name: 'custom_tool', description: 'A custom tool', parameters: {} },
},
],
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
tools: [
{
type: 'function',
function: { name: 'custom_tool', description: 'A custom tool', parameters: {} },
},
{
function: {
name: '$web_search',
},
type: 'builtin_function',
},
],
}),
expect.anything(),
);
});
it('should not add $web_search tool when enabledSearch is false', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
enabledSearch: false,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
tools: undefined,
}),
expect.anything(),
);
});
it('should not add $web_search tool when enabledSearch is not specified', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
tools: undefined,
}),
expect.anything(),
);
});
it('should preserve existing tools when enabledSearch is false', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
enabledSearch: false,
tools: [
{
type: 'function',
function: { name: 'custom_tool', description: 'A custom tool', parameters: {} },
},
],
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
tools: [
{
type: 'function',
function: { name: 'custom_tool', description: 'A custom tool', parameters: {} },
},
],
}),
expect.anything(),
expect(payload.tools).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'builtin_function',
function: { name: '$web_search' },
}),
]),
);
});
});
@@ -289,216 +357,114 @@ describe('LobeMoonshotAI - custom features', () => {
temperature: 0.8,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
temperature: 0.4,
}),
expect.anything(),
);
});
it('should normalize temperature to 0.5 when temperature is 1', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 1,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
temperature: 0.5,
}),
expect.anything(),
);
});
it('should normalize temperature to 0 when temperature is 0', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
temperature: 0,
}),
expect.anything(),
);
});
it('should handle high temperature values (2.0 normalized to 1.0)', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 2,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
temperature: 1,
}),
expect.anything(),
);
});
it('should normalize negative temperature values', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: -1,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
temperature: -0.5,
}),
expect.anything(),
);
const payload = getLastRequestPayload();
expect(payload.temperature).toBe(0.4);
});
});
describe('other payload properties', () => {
it('should preserve other payload properties', async () => {
describe('kimi-k2.5 thinking support', () => {
it('should add thinking params for kimi-k2.5 model', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
temperature: 0.5,
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({
budget_tokens: 1024,
type: 'enabled',
});
expect(payload.temperature).toBe(1);
expect(payload.top_p).toBe(0.95);
});
it('should disable thinking when type is disabled', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
temperature: 0.5,
thinking: { budget_tokens: 0, type: 'disabled' },
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({ type: 'disabled' });
expect(payload.temperature).toBe(0.6);
});
it('should respect custom thinking budget', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
max_tokens: 4096,
thinking: { budget_tokens: 2048, type: 'enabled' },
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({
budget_tokens: 2048,
type: 'enabled',
});
});
it('should not add thinking params for non-kimi-k2.5 models', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0.5,
max_tokens: 100,
top_p: 0.9,
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [{ content: 'Hello', role: 'user' }],
model: 'moonshot-v1-8k',
temperature: 0.25,
max_tokens: 100,
top_p: 0.9,
}),
expect.anything(),
);
const payload = getLastRequestPayload();
expect(payload.thinking).toBeUndefined();
});
it('should combine all features together', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: '', role: 'assistant' },
{ content: 'Question?', role: 'user' },
],
model: 'moonshot-v1-8k',
temperature: 0.7,
max_tokens: 2000,
enabledSearch: true,
tools: [
{
type: 'function',
function: { name: 'custom_tool', description: 'A custom tool', parameters: {} },
},
],
});
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
expect.objectContaining({
messages: [
{ content: 'Hello', role: 'user' },
{ content: ' ', role: 'assistant' },
{ content: 'Question?', role: 'user' },
],
model: 'moonshot-v1-8k',
temperature: 0.35,
max_tokens: 2000,
tools: [
{
type: 'function',
function: { name: 'custom_tool', description: 'A custom tool', parameters: {} },
},
{
function: {
name: '$web_search',
},
type: 'builtin_function',
},
],
}),
expect.anything(),
);
});
});
});
describe('models', () => {
const mockClient = {
models: {
list: vi.fn(),
},
};
beforeEach(() => {
vi.clearAllMocks();
});
it('should fetch and process models successfully', async () => {
mockClient.models.list.mockResolvedValue({
data: [{ id: 'moonshot-v1-8k' }, { id: 'moonshot-v1-32k' }, { id: 'moonshot-v1-128k' }],
});
const models = await params.models({ client: mockClient as any });
expect(mockClient.models.list).toHaveBeenCalledTimes(1);
expect(models).toHaveLength(3);
expect(models[0].id).toBe('moonshot-v1-8k');
expect(models[1].id).toBe('moonshot-v1-32k');
expect(models[2].id).toBe('moonshot-v1-128k');
});
it('should handle single model', async () => {
mockClient.models.list.mockResolvedValue({
data: [{ id: 'moonshot-v1-8k' }],
});
const models = await params.models({ client: mockClient as any });
expect(models).toHaveLength(1);
expect(models[0].id).toBe('moonshot-v1-8k');
});
it('should handle empty model list', async () => {
mockClient.models.list.mockResolvedValue({
data: [],
});
const models = await params.models({ client: mockClient as any });
expect(models).toEqual([]);
});
it('should process models with MODEL_LIST_CONFIGS', async () => {
mockClient.models.list.mockResolvedValue({
data: [{ id: 'moonshot-v1-8k' }],
});
const models = await params.models({ client: mockClient as any });
// The processModelList function should merge with known model list
expect(models[0]).toHaveProperty('id');
expect(models[0].id).toBe('moonshot-v1-8k');
});
it('should preserve model properties from API response', async () => {
mockClient.models.list.mockResolvedValue({
data: [
{ id: 'moonshot-v1-8k', extra_field: 'value' },
{ id: 'moonshot-v1-32k', another_field: 123 },
],
});
const models = await params.models({ client: mockClient as any });
expect(models).toHaveLength(2);
expect(models[0].id).toBe('moonshot-v1-8k');
expect(models[1].id).toBe('moonshot-v1-32k');
});
});
});
describe('models', () => {
const fetchModels = params.models as (params: { client: OpenAI }) => Promise<any[]>;
it('should use OpenAI client to fetch models', async () => {
const mockClient = {
models: {
list: vi.fn().mockResolvedValue({
data: [{ id: 'moonshot-v1-8k' }, { id: 'moonshot-v1-32k' }],
}),
},
} as unknown as OpenAI;
const models = await fetchModels({ client: mockClient });
expect(mockClient.models.list).toHaveBeenCalled();
expect(models).toHaveLength(2);
expect(models[0].id).toBe('moonshot-v1-8k');
});
it('should handle empty model list', async () => {
const mockClient = {
models: {
list: vi.fn().mockResolvedValue({ data: [] }),
},
} as unknown as OpenAI;
const models = await fetchModels({ client: mockClient });
expect(models).toEqual([]);
});
it('should handle fetch error gracefully', async () => {
const mockClient = {
models: {
list: vi.fn().mockRejectedValue(new Error('Network error')),
},
} as unknown as OpenAI;
const models = await fetchModels({ client: mockClient });
expect(models).toEqual([]);
});
});

View File

@@ -1,99 +1,245 @@
import type Anthropic from '@anthropic-ai/sdk';
import type { ChatModelCard } from '@lobechat/types';
import { ModelProvider } from 'model-bank';
import OpenAI from 'openai';
import { CreateRouterRuntimeOptions, createRouterRuntime } from '../../core/RouterRuntime';
import {
type OpenAICompatibleFactoryOptions,
createOpenAICompatibleRuntime,
} from '../../core/openaiCompatibleFactory';
import { resolveParameters } from '../../core/parameterResolver';
buildDefaultAnthropicPayload,
createAnthropicCompatibleParams,
createAnthropicCompatibleRuntime,
} from '../../core/anthropicCompatibleFactory';
import { createOpenAICompatibleRuntime } from '../../core/openaiCompatibleFactory';
import { ChatStreamPayload } from '../../types';
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
import { MODEL_LIST_CONFIGS, processModelList } from '../../utils/modelParse';
export interface MoonshotModelCard {
id: string;
}
export const params = {
baseURL: 'https://api.moonshot.cn/v1',
const DEFAULT_MOONSHOT_BASE_URL = 'https://api.moonshot.ai/v1';
const DEFAULT_MOONSHOT_ANTHROPIC_BASE_URL = 'https://api.moonshot.ai/anthropic';
/**
* Normalize empty assistant messages by adding a space placeholder (#8418)
*/
const normalizeMoonshotMessages = (messages: ChatStreamPayload['messages']) =>
messages.map((message) => {
if (message.role !== 'assistant') return message;
if (message.content !== '' && message.content !== null && message.content !== undefined)
return message;
return { ...message, content: [{ text: ' ', type: 'text' as const }] };
});
/**
* Append Moonshot web search tool for builtin search capability
*/
const appendMoonshotSearchTool = (
tools: Anthropic.MessageCreateParams['tools'] | undefined,
enabledSearch?: boolean,
) => {
if (!enabledSearch) return tools;
const moonshotSearchTool = {
function: { name: '$web_search' },
type: 'builtin_function',
} as any;
return tools?.length ? [...tools, moonshotSearchTool] : [moonshotSearchTool];
};
/**
* Build Moonshot Anthropic format payload with special handling for kimi-k2.5 thinking
*/
const buildMoonshotAnthropicPayload = async (
payload: ChatStreamPayload,
): Promise<Anthropic.MessageCreateParams> => {
const normalizedMessages = normalizeMoonshotMessages(payload.messages);
const resolvedMaxTokens =
payload.max_tokens ??
(await getModelPropertyWithFallback<number | undefined>(
payload.model,
'maxOutput',
ModelProvider.Moonshot,
)) ??
8192;
const basePayload = await buildDefaultAnthropicPayload({
...payload,
enabledSearch: false,
max_tokens: resolvedMaxTokens,
messages: normalizedMessages,
});
const tools = appendMoonshotSearchTool(basePayload.tools, payload.enabledSearch);
const basePayloadWithSearch = { ...basePayload, tools };
const isK25Model = payload.model === 'kimi-k2.5';
if (!isK25Model) return basePayloadWithSearch;
const resolvedThinkingBudget = payload.thinking?.budget_tokens
? Math.min(payload.thinking.budget_tokens, resolvedMaxTokens - 1)
: 1024;
const thinkingParam =
payload.thinking?.type === 'disabled'
? ({ type: 'disabled' } as const)
: ({ budget_tokens: resolvedThinkingBudget, type: 'enabled' } as const);
const isThinkingEnabled = thinkingParam.type === 'enabled';
return {
...basePayloadWithSearch,
temperature: isThinkingEnabled ? 1 : 0.6,
thinking: thinkingParam,
top_p: 0.95,
};
};
/**
* Build Moonshot OpenAI format payload with temperature normalization
*/
const buildMoonshotOpenAIPayload = (
payload: ChatStreamPayload,
): OpenAI.ChatCompletionCreateParamsStreaming => {
const { enabledSearch, messages, model, temperature, thinking, tools, ...rest } = payload;
// Normalize messages: handle empty assistant messages and interleaved thinking
const normalizedMessages = messages.map((message: any) => {
let normalizedMessage = message;
// Add a space for empty assistant messages (#8418)
if (
message.role === 'assistant' &&
(message.content === '' || message.content === null || message.content === undefined)
) {
normalizedMessage = { ...normalizedMessage, content: ' ' };
}
// Interleaved thinking: convert reasoning to reasoning_content
if (message.role === 'assistant' && message.reasoning) {
const { reasoning, ...messageWithoutReasoning } = normalizedMessage;
return {
...messageWithoutReasoning,
...(!reasoning.signature && reasoning.content
? { reasoning_content: reasoning.content }
: {}),
};
}
return normalizedMessage;
});
const moonshotTools = enabledSearch
? [
...(tools || []),
{
function: { name: '$web_search' },
type: 'builtin_function',
},
]
: tools;
const isK25Model = model === 'kimi-k2.5';
if (isK25Model) {
const thinkingParam =
thinking?.type === 'disabled' ? { type: 'disabled' } : { type: 'enabled' };
const isThinkingEnabled = thinkingParam.type === 'enabled';
return {
...rest,
frequency_penalty: 0,
messages: normalizedMessages,
model,
presence_penalty: 0,
stream: payload.stream ?? true,
temperature: isThinkingEnabled ? 1 : 0.6,
thinking: thinkingParam,
tools: moonshotTools?.length ? moonshotTools : undefined,
top_p: 0.95,
} as any;
}
// Moonshot temperature is normalized by dividing by 2
const normalizedTemperature = temperature !== undefined ? temperature / 2 : undefined;
return {
...rest,
messages: normalizedMessages,
model,
stream: payload.stream ?? true,
temperature: normalizedTemperature,
tools: moonshotTools?.length ? moonshotTools : undefined,
} as OpenAI.ChatCompletionCreateParamsStreaming;
};
/**
* Fetch Moonshot models from the API using OpenAI client
*/
const fetchMoonshotModels = async ({ client }: { client: OpenAI }): Promise<ChatModelCard[]> => {
try {
const modelsPage = (await client.models.list()) as any;
const modelList: MoonshotModelCard[] = modelsPage.data || [];
return processModelList(modelList, MODEL_LIST_CONFIGS.moonshot, 'moonshot');
} catch (error) {
console.warn('Failed to fetch Moonshot models:', error);
return [];
}
};
/**
* Moonshot Anthropic format runtime
*/
export const anthropicParams = createAnthropicCompatibleParams({
baseURL: DEFAULT_MOONSHOT_ANTHROPIC_BASE_URL,
chatCompletion: {
handlePayload: buildMoonshotAnthropicPayload,
},
customClient: {},
debug: {
chatCompletion: () => process.env.DEBUG_MOONSHOT_CHAT_COMPLETION === '1',
},
provider: ModelProvider.Moonshot,
});
export const LobeMoonshotAnthropicAI = createAnthropicCompatibleRuntime(anthropicParams);
/**
* Moonshot OpenAI format runtime
*/
export const LobeMoonshotOpenAI = createOpenAICompatibleRuntime({
baseURL: DEFAULT_MOONSHOT_BASE_URL,
chatCompletion: {
forceImageBase64: true,
handlePayload: (payload: ChatStreamPayload) => {
const { enabledSearch, messages, model, temperature, thinking, tools, ...rest } = payload;
const filteredMessages = messages.map((message: any) => {
let normalizedMessage = message;
// Add a space for empty assistant messages (#8418)
if (message.role === 'assistant' && (!message.content || message.content === '')) {
normalizedMessage = { ...normalizedMessage, content: ' ' };
}
// Interleaved thinking
if (message.role === 'assistant' && message.reasoning) {
const { reasoning, ...messageWithoutReasoning } = normalizedMessage;
return {
...messageWithoutReasoning,
...(!reasoning.signature && reasoning.content
? { reasoning_content: reasoning.content }
: {}),
};
}
return normalizedMessage;
});
const moonshotTools = enabledSearch
? [
...(tools || []),
{
function: {
name: '$web_search',
},
type: 'builtin_function',
},
]
: tools;
const isK25Model = model === 'kimi-k2.5';
if (isK25Model) {
const thinkingParam =
thinking?.type === 'disabled' ? { type: 'disabled' } : { type: 'enabled' };
const isThinkingEnabled = thinkingParam.type === 'enabled';
return {
...rest,
frequency_penalty: 0,
messages: filteredMessages,
model,
presence_penalty: 0,
temperature: isThinkingEnabled ? 1 : 0.6,
thinking: thinkingParam,
tools: moonshotTools,
top_p: 0.95,
} as any;
}
// Resolve parameters with normalization for non-K2.5 models
const resolvedParams = resolveParameters({ temperature }, { normalizeTemperature: true });
return {
...rest,
messages: filteredMessages,
model,
temperature: resolvedParams.temperature,
tools: moonshotTools,
} as any;
},
handlePayload: buildMoonshotOpenAIPayload,
},
debug: {
chatCompletion: () => process.env.DEBUG_MOONSHOT_CHAT_COMPLETION === '1',
},
models: async ({ client }) => {
const modelsPage = (await client.models.list()) as any;
const modelList: MoonshotModelCard[] = modelsPage.data;
return processModelList(modelList, MODEL_LIST_CONFIGS.moonshot, 'moonshot');
},
provider: ModelProvider.Moonshot,
} satisfies OpenAICompatibleFactoryOptions;
});
export const LobeMoonshotAI = createOpenAICompatibleRuntime(params);
/**
* RouterRuntime configuration for Moonshot
* Routes to Anthropic format for /anthropic URLs, otherwise uses OpenAI format
*/
export const params: CreateRouterRuntimeOptions = {
id: ModelProvider.Moonshot,
models: fetchMoonshotModels,
routers: [
{
apiType: 'anthropic',
baseURLPattern: /\/anthropic\/?$/,
options: {},
runtime: LobeMoonshotAnthropicAI,
},
{
apiType: 'openai',
options: {},
runtime: LobeMoonshotOpenAI,
},
],
};
export const LobeMoonshotAI = createRouterRuntime(params);