mirror of
https://github.com/lobehub/lobehub.git
synced 2026-03-27 13:29:15 +07:00
🐛 fix(model-runtime): fix moonshot interleaved thinking and circular dependency (#12112)
* 🐛 fix(model-runtime): fix moonshot interleaved thinking and circular dependency - Add apiTypes.ts to break circular dependency between createRuntime and baseRuntimeMap - Use dynamic import for baseRuntimeMap in createRuntime.ts - Add moonshot to baseRuntimeMap for RouterRuntime support - Convert reasoning to thinking block in normalizeMoonshotMessages for Anthropic format - Add tests for interleaved thinking scenarios * ♻️ refactor(moonshot): extract shared helpers to reduce code duplication * 🐛 fix(AgentSetting): center Empty component in opening questions
This commit is contained in:
18
packages/model-runtime/src/core/RouterRuntime/apiTypes.ts
Normal file
18
packages/model-runtime/src/core/RouterRuntime/apiTypes.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import type { LobeRuntimeAI } from '../BaseAI';
|
||||
|
||||
export type ApiType =
|
||||
| 'anthropic'
|
||||
| 'azure'
|
||||
| 'bedrock'
|
||||
| 'cloudflare'
|
||||
| 'deepseek'
|
||||
| 'fal'
|
||||
| 'google'
|
||||
| 'minimax'
|
||||
| 'moonshot'
|
||||
| 'openai'
|
||||
| 'qwen'
|
||||
| 'vertexai'
|
||||
| 'xai';
|
||||
|
||||
export type RuntimeClass = new (options?: any) => LobeRuntimeAI;
|
||||
@@ -6,10 +6,12 @@ import { LobeDeepSeekAI } from '../../providers/deepseek';
|
||||
import { LobeFalAI } from '../../providers/fal';
|
||||
import { LobeGoogleAI } from '../../providers/google';
|
||||
import { LobeMinimaxAI } from '../../providers/minimax';
|
||||
import { LobeMoonshotAI } from '../../providers/moonshot';
|
||||
import { LobeOpenAI } from '../../providers/openai';
|
||||
import { LobeQwenAI } from '../../providers/qwen';
|
||||
import { LobeVertexAI } from '../../providers/vertexai';
|
||||
import { LobeXAI } from '../../providers/xai';
|
||||
import type { ApiType, RuntimeClass } from './apiTypes';
|
||||
|
||||
export const baseRuntimeMap = {
|
||||
anthropic: LobeAnthropicAI,
|
||||
@@ -20,8 +22,9 @@ export const baseRuntimeMap = {
|
||||
fal: LobeFalAI,
|
||||
google: LobeGoogleAI,
|
||||
minimax: LobeMinimaxAI,
|
||||
moonshot: LobeMoonshotAI,
|
||||
openai: LobeOpenAI,
|
||||
qwen: LobeQwenAI,
|
||||
vertexai: LobeVertexAI,
|
||||
xai: LobeXAI,
|
||||
};
|
||||
} satisfies Record<ApiType, RuntimeClass>;
|
||||
|
||||
@@ -29,7 +29,7 @@ import { postProcessModelList } from '../../utils/postProcessModelList';
|
||||
import { safeParseJSON } from '../../utils/safeParseJSON';
|
||||
import { LobeRuntimeAI } from '../BaseAI';
|
||||
import { CreateImageOptions, CustomClientOptions } from '../openaiCompatibleFactory';
|
||||
import { baseRuntimeMap } from './baseRuntimeMap';
|
||||
import type { ApiType, RuntimeClass } from './apiTypes';
|
||||
|
||||
const log = debug('lobe-model-runtime:router-runtime');
|
||||
|
||||
@@ -51,17 +51,15 @@ interface ProviderIniOptions extends Record<string, any> {
|
||||
* `apiType` allows switching provider when falling back.
|
||||
*/
|
||||
interface RouterOptionItem extends ProviderIniOptions {
|
||||
apiType?: keyof typeof baseRuntimeMap;
|
||||
apiType?: ApiType;
|
||||
id?: string;
|
||||
remark?: string;
|
||||
}
|
||||
|
||||
type RouterOptions = RouterOptionItem | RouterOptionItem[];
|
||||
|
||||
export type RuntimeClass = new (options?: any) => LobeRuntimeAI;
|
||||
|
||||
interface RouterInstance {
|
||||
apiType: keyof typeof baseRuntimeMap;
|
||||
apiType: ApiType;
|
||||
baseURLPattern?: RegExp;
|
||||
models?: string[];
|
||||
options: RouterOptions;
|
||||
@@ -213,15 +211,15 @@ export const createRouterRuntime = ({
|
||||
* Build a runtime instance for a specific option item.
|
||||
* Option items can override apiType to switch providers for fallback.
|
||||
*/
|
||||
private createRuntimeFromOption(
|
||||
private async createRuntimeFromOption(
|
||||
router: RouterInstance,
|
||||
optionItem: RouterOptionItem,
|
||||
): {
|
||||
): Promise<{
|
||||
channelId?: string;
|
||||
id: keyof typeof baseRuntimeMap;
|
||||
id: ApiType;
|
||||
remark?: string;
|
||||
runtime: LobeRuntimeAI;
|
||||
} {
|
||||
}> {
|
||||
const { apiType: optionApiType, id: channelId, remark, ...optionOverrides } = optionItem;
|
||||
const resolvedApiType = optionApiType ?? router.apiType;
|
||||
const finalOptions = { ...this._params, ...this._options, ...optionOverrides };
|
||||
@@ -255,6 +253,7 @@ export const createRouterRuntime = ({
|
||||
};
|
||||
}
|
||||
|
||||
const { baseRuntimeMap } = await import('./baseRuntimeMap');
|
||||
const providerAI =
|
||||
resolvedApiType === router.apiType
|
||||
? (router.runtime ?? baseRuntimeMap[resolvedApiType] ?? LobeOpenAI)
|
||||
@@ -293,7 +292,7 @@ export const createRouterRuntime = ({
|
||||
id: resolvedApiType,
|
||||
remark,
|
||||
runtime,
|
||||
} = this.createRuntimeFromOption(matchedRouter, optionItem);
|
||||
} = await this.createRuntimeFromOption(matchedRouter, optionItem);
|
||||
|
||||
try {
|
||||
const result = await requestHandler(runtime);
|
||||
@@ -344,19 +343,21 @@ export const createRouterRuntime = ({
|
||||
|
||||
async models() {
|
||||
const resolvedRouters = await this.resolveRouters();
|
||||
const runtimes = resolvedRouters.map((router) => {
|
||||
const routerOptions = this.normalizeRouterOptions(router);
|
||||
const { id: resolvedApiType, runtime } = this.createRuntimeFromOption(
|
||||
router,
|
||||
routerOptions[0],
|
||||
);
|
||||
const runtimes = await Promise.all(
|
||||
resolvedRouters.map(async (router) => {
|
||||
const routerOptions = this.normalizeRouterOptions(router);
|
||||
const { id: resolvedApiType, runtime } = await this.createRuntimeFromOption(
|
||||
router,
|
||||
routerOptions[0],
|
||||
);
|
||||
|
||||
return {
|
||||
id: resolvedApiType,
|
||||
models: router.models,
|
||||
runtime,
|
||||
};
|
||||
});
|
||||
return {
|
||||
id: resolvedApiType,
|
||||
models: router.models,
|
||||
runtime,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
if (modelsOption && typeof modelsOption === 'function') {
|
||||
// If it's a functional configuration, use the last runtime's client to call the function
|
||||
|
||||
@@ -422,6 +422,118 @@ describe('LobeMoonshotAnthropicAI', () => {
|
||||
expect(payload.thinking).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('interleaved thinking', () => {
|
||||
it('should convert reasoning to thinking block for assistant messages', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: 'Response',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'My reasoning process' },
|
||||
} as any,
|
||||
{ content: 'Follow-up', role: 'user' },
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual([
|
||||
{ type: 'thinking', thinking: 'My reasoning process' },
|
||||
{ type: 'text', text: 'Response' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle empty content with reasoning', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: '',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'My reasoning process' },
|
||||
} as any,
|
||||
{ content: 'Follow-up', role: 'user' },
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual([
|
||||
{ type: 'thinking', thinking: 'My reasoning process' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should not add thinking block when reasoning has signature', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: 'Response',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'My reasoning', signature: 'some-signature' },
|
||||
} as any,
|
||||
{ content: 'Follow-up', role: 'user' },
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
// Should not have thinking block, just text
|
||||
expect(assistantMessage?.content).toBe('Response');
|
||||
});
|
||||
|
||||
it('should handle assistant message with tool_calls and reasoning', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: '',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'Thinking about tools' },
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', arguments: '{"city":"Beijing"}' },
|
||||
},
|
||||
],
|
||||
} as any,
|
||||
{
|
||||
content: '{"temp": 20}',
|
||||
role: 'tool',
|
||||
tool_call_id: 'call_1',
|
||||
} as any,
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'thinking', thinking: 'Thinking about tools' }),
|
||||
expect.objectContaining({ type: 'tool_use', name: 'get_weather' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -21,34 +21,59 @@ export interface MoonshotModelCard {
|
||||
const DEFAULT_MOONSHOT_BASE_URL = 'https://api.moonshot.ai/v1';
|
||||
const DEFAULT_MOONSHOT_ANTHROPIC_BASE_URL = 'https://api.moonshot.ai/anthropic';
|
||||
|
||||
/**
|
||||
* Normalize empty assistant messages by adding a space placeholder (#8418)
|
||||
*/
|
||||
const normalizeMoonshotMessages = (messages: ChatStreamPayload['messages']) =>
|
||||
messages.map((message) => {
|
||||
if (message.role !== 'assistant') return message;
|
||||
if (message.content !== '' && message.content !== null && message.content !== undefined)
|
||||
return message;
|
||||
// Shared constants and helpers
|
||||
const MOONSHOT_SEARCH_TOOL = { function: { name: '$web_search' }, type: 'builtin_function' } as any;
|
||||
const isKimiK25Model = (model: string) => model === 'kimi-k2.5';
|
||||
const isEmptyContent = (content: any) =>
|
||||
content === '' || content === null || content === undefined;
|
||||
const hasValidReasoning = (reasoning: any) => reasoning?.content && !reasoning?.signature;
|
||||
|
||||
return { ...message, content: [{ text: ' ', type: 'text' as const }] };
|
||||
const getK25Params = (isThinkingEnabled: boolean) => ({
|
||||
temperature: isThinkingEnabled ? 1 : 0.6,
|
||||
top_p: 0.95,
|
||||
});
|
||||
|
||||
const appendSearchTool = <T>(tools: T[] | undefined, enabledSearch?: boolean): T[] | undefined => {
|
||||
if (!enabledSearch) return tools;
|
||||
return tools?.length ? [...tools, MOONSHOT_SEARCH_TOOL] : [MOONSHOT_SEARCH_TOOL];
|
||||
};
|
||||
|
||||
// Anthropic format helpers
|
||||
const buildThinkingBlock = (reasoning: any) =>
|
||||
hasValidReasoning(reasoning) ? { thinking: reasoning.content, type: 'thinking' as const } : null;
|
||||
|
||||
const toContentArray = (content: any) =>
|
||||
Array.isArray(content) ? content : [{ text: content, type: 'text' as const }];
|
||||
|
||||
const normalizeMessagesForAnthropic = (messages: ChatStreamPayload['messages']) =>
|
||||
messages.map((message: any) => {
|
||||
if (message.role !== 'assistant') return message;
|
||||
|
||||
const { reasoning, ...rest } = message;
|
||||
const thinkingBlock = buildThinkingBlock(reasoning);
|
||||
|
||||
if (isEmptyContent(message.content)) {
|
||||
const placeholder = { text: ' ', type: 'text' as const };
|
||||
return { ...rest, content: thinkingBlock ? [thinkingBlock] : [placeholder] };
|
||||
}
|
||||
|
||||
if (!thinkingBlock) return rest;
|
||||
return { ...rest, content: [thinkingBlock, ...toContentArray(message.content)] };
|
||||
});
|
||||
|
||||
/**
|
||||
* Append Moonshot web search tool for builtin search capability
|
||||
*/
|
||||
const appendMoonshotSearchTool = (
|
||||
tools: Anthropic.MessageCreateParams['tools'] | undefined,
|
||||
enabledSearch?: boolean,
|
||||
) => {
|
||||
if (!enabledSearch) return tools;
|
||||
// OpenAI format helpers
|
||||
const normalizeMessagesForOpenAI = (messages: ChatStreamPayload['messages']) =>
|
||||
messages.map((message: any) => {
|
||||
if (message.role !== 'assistant') return message;
|
||||
|
||||
const moonshotSearchTool = {
|
||||
function: { name: '$web_search' },
|
||||
type: 'builtin_function',
|
||||
} as any;
|
||||
const { reasoning, ...rest } = message;
|
||||
const normalized = isEmptyContent(message.content) ? { ...rest, content: ' ' } : rest;
|
||||
|
||||
return tools?.length ? [...tools, moonshotSearchTool] : [moonshotSearchTool];
|
||||
};
|
||||
if (hasValidReasoning(reasoning)) {
|
||||
return { ...normalized, reasoning_content: reasoning.content };
|
||||
}
|
||||
return normalized;
|
||||
});
|
||||
|
||||
/**
|
||||
* Build Moonshot Anthropic format payload with special handling for kimi-k2.5 thinking
|
||||
@@ -56,7 +81,6 @@ const appendMoonshotSearchTool = (
|
||||
const buildMoonshotAnthropicPayload = async (
|
||||
payload: ChatStreamPayload,
|
||||
): Promise<Anthropic.MessageCreateParams> => {
|
||||
const normalizedMessages = normalizeMoonshotMessages(payload.messages);
|
||||
const resolvedMaxTokens =
|
||||
payload.max_tokens ??
|
||||
(await getModelPropertyWithFallback<number | undefined>(
|
||||
@@ -70,14 +94,13 @@ const buildMoonshotAnthropicPayload = async (
|
||||
...payload,
|
||||
enabledSearch: false,
|
||||
max_tokens: resolvedMaxTokens,
|
||||
messages: normalizedMessages,
|
||||
messages: normalizeMessagesForAnthropic(payload.messages),
|
||||
});
|
||||
|
||||
const tools = appendMoonshotSearchTool(basePayload.tools, payload.enabledSearch);
|
||||
const tools = appendSearchTool(basePayload.tools, payload.enabledSearch);
|
||||
const basePayloadWithSearch = { ...basePayload, tools };
|
||||
|
||||
const isK25Model = payload.model === 'kimi-k2.5';
|
||||
if (!isK25Model) return basePayloadWithSearch;
|
||||
if (!isKimiK25Model(payload.model)) return basePayloadWithSearch;
|
||||
|
||||
const resolvedThinkingBudget = payload.thinking?.budget_tokens
|
||||
? Math.min(payload.thinking.budget_tokens, resolvedMaxTokens - 1)
|
||||
@@ -86,13 +109,11 @@ const buildMoonshotAnthropicPayload = async (
|
||||
payload.thinking?.type === 'disabled'
|
||||
? ({ type: 'disabled' } as const)
|
||||
: ({ budget_tokens: resolvedThinkingBudget, type: 'enabled' } as const);
|
||||
const isThinkingEnabled = thinkingParam.type === 'enabled';
|
||||
|
||||
return {
|
||||
...basePayloadWithSearch,
|
||||
temperature: isThinkingEnabled ? 1 : 0.6,
|
||||
...getK25Params(thinkingParam.type === 'enabled'),
|
||||
thinking: thinkingParam,
|
||||
top_p: 0.95,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -104,71 +125,33 @@ const buildMoonshotOpenAIPayload = (
|
||||
): OpenAI.ChatCompletionCreateParamsStreaming => {
|
||||
const { enabledSearch, messages, model, temperature, thinking, tools, ...rest } = payload;
|
||||
|
||||
// Normalize messages: handle empty assistant messages and interleaved thinking
|
||||
const normalizedMessages = messages.map((message: any) => {
|
||||
let normalizedMessage = message;
|
||||
const normalizedMessages = normalizeMessagesForOpenAI(messages);
|
||||
const moonshotTools = appendSearchTool(tools, enabledSearch);
|
||||
|
||||
// Add a space for empty assistant messages (#8418)
|
||||
if (
|
||||
message.role === 'assistant' &&
|
||||
(message.content === '' || message.content === null || message.content === undefined)
|
||||
) {
|
||||
normalizedMessage = { ...normalizedMessage, content: ' ' };
|
||||
}
|
||||
|
||||
// Interleaved thinking: convert reasoning to reasoning_content
|
||||
if (message.role === 'assistant' && message.reasoning) {
|
||||
const { reasoning, ...messageWithoutReasoning } = normalizedMessage;
|
||||
return {
|
||||
...messageWithoutReasoning,
|
||||
...(!reasoning.signature && reasoning.content
|
||||
? { reasoning_content: reasoning.content }
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
return normalizedMessage;
|
||||
});
|
||||
|
||||
const moonshotTools = enabledSearch
|
||||
? [
|
||||
...(tools || []),
|
||||
{
|
||||
function: { name: '$web_search' },
|
||||
type: 'builtin_function',
|
||||
},
|
||||
]
|
||||
: tools;
|
||||
|
||||
const isK25Model = model === 'kimi-k2.5';
|
||||
|
||||
if (isK25Model) {
|
||||
if (isKimiK25Model(model)) {
|
||||
const thinkingParam =
|
||||
thinking?.type === 'disabled' ? { type: 'disabled' } : { type: 'enabled' };
|
||||
const isThinkingEnabled = thinkingParam.type === 'enabled';
|
||||
|
||||
return {
|
||||
...rest,
|
||||
...getK25Params(thinkingParam.type === 'enabled'),
|
||||
frequency_penalty: 0,
|
||||
messages: normalizedMessages,
|
||||
model,
|
||||
presence_penalty: 0,
|
||||
stream: payload.stream ?? true,
|
||||
temperature: isThinkingEnabled ? 1 : 0.6,
|
||||
thinking: thinkingParam,
|
||||
tools: moonshotTools?.length ? moonshotTools : undefined,
|
||||
top_p: 0.95,
|
||||
} as any;
|
||||
}
|
||||
|
||||
// Moonshot temperature is normalized by dividing by 2
|
||||
const normalizedTemperature = temperature !== undefined ? temperature / 2 : undefined;
|
||||
|
||||
return {
|
||||
...rest,
|
||||
messages: normalizedMessages,
|
||||
model,
|
||||
stream: payload.stream ?? true,
|
||||
temperature: normalizedTemperature,
|
||||
// Moonshot temperature is normalized by dividing by 2
|
||||
temperature: temperature !== undefined ? temperature / 2 : undefined,
|
||||
tools: moonshotTools?.length ? moonshotTools : undefined,
|
||||
} as OpenAI.ChatCompletionCreateParamsStreaming;
|
||||
};
|
||||
|
||||
@@ -14,7 +14,7 @@ import { selectors } from '../store/selectors';
|
||||
const styles = createStaticStyles(({ css, cssVar }) => ({
|
||||
empty: css`
|
||||
margin-block: 24px;
|
||||
margin-inline: 0;
|
||||
margin-inline: auto;
|
||||
`,
|
||||
questionItemContainer: css`
|
||||
padding-block: 8px;
|
||||
|
||||
Reference in New Issue
Block a user