🐛 fix(stream): update event handling to use 'text' instead of 'content_part' in gemini 2.5 models (#11235)

🐛 fix(stream): update event handling to use 'text' instead of 'content_part' in Google AI stream
This commit is contained in:
sxjeru
2026-01-31 22:43:18 +08:00
committed by GitHub
parent 338df4baf9
commit a76a630f28
2 changed files with 55 additions and 17 deletions

View File

@@ -251,16 +251,16 @@ describe('GoogleGenerativeAIStream', () => {
expect(chunks).toEqual(
[
'id: chat_1',
'event: content_part',
'data: {"content":"234","partType":"text"}\n',
'event: text',
'data: "234"\n',
'id: chat_1',
'event: text',
'data: ""\n',
'id: chat_1',
'event: content_part',
`data: {"content":"567890\\n","partType":"text"}\n`,
'event: text',
`data: "567890\\n"\n`,
// stop
'id: chat_1',
'event: stop',
@@ -384,12 +384,12 @@ describe('GoogleGenerativeAIStream', () => {
`data: {"content":"**Finalizing Interpretation**\\n\\n","inReasoning":true,"partType":"text"}\n`,
'id: chat_1',
'event: content_part',
`data: {"content":"简单来说,","partType":"text"}\n`,
'event: text',
'data: "简单来说,"\n',
'id: chat_1',
'event: content_part',
`data: {"content":"文本内容。","partType":"text"}\n`,
'event: text',
'data: "文本内容。"\n',
// stop
'id: chat_1',
'event: stop',
@@ -471,12 +471,12 @@ describe('GoogleGenerativeAIStream', () => {
expect(chunks).toEqual(
[
'id: chat_1',
'event: content_part',
'data: {"content":"234","partType":"text"}\n',
'event: text',
'data: "234"\n',
'id: chat_1',
'event: content_part',
`data: {"content":"567890\\n","partType":"text"}\n`,
'event: text',
'data: "567890\\n"\n',
// stop
'id: chat_1',
'event: stop',
@@ -1103,7 +1103,7 @@ describe('GoogleGenerativeAIStream', () => {
content: {
parts: [
{
text: '**Planning the Solution**\n\nI\'m solidifying my plan...',
text: "**Planning the Solution**\n\nI'm solidifying my plan...",
thought: true,
},
],
@@ -1901,5 +1901,46 @@ describe('GoogleGenerativeAIStream', () => {
].map((i) => i + '\n'),
);
});
it('should NOT use multimodal processing if only thoughtsTokenCount is present in metadata but no thought parts', async () => {
vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
const data = [
{
candidates: [
{
content: {
parts: [{ text: 'Hello world' }],
role: 'model',
},
index: 0,
},
],
usageMetadata: {
promptTokenCount: 10,
candidatesTokenCount: 2,
totalTokenCount: 17,
thoughtsTokenCount: 5,
},
modelVersion: 'gemini-2.5-flash',
},
];
const mockGoogleStream = new ReadableStream({
start(controller) {
data.forEach((item) => {
controller.enqueue(item);
});
controller.close();
},
});
const protocolStream = GoogleGenerativeAIStream(mockGoogleStream);
const chunks = await decodeStreamChunks(protocolStream);
// Should use 'text' event, not 'content_part'
expect(chunks).toContain('event: text\n');
expect(chunks).not.toContain('event: content_part\n');
});
});
});

View File

@@ -120,7 +120,6 @@ const transformGoogleGenerativeAIStream = (
const hasReasoningParts = parts.some((p: any) => p.thought === true);
const hasImageParts = parts.some((p: any) => p.inlineData);
const hasThoughtSignature = parts.some((p: any) => p.thoughtSignature);
const hasThoughtsInMetadata = (usageMetadata as any)?.thoughtsTokenCount > 0;
// Check model version to determine if new format should be used
const modelVersion = (chunk as any).modelVersion || '';
@@ -144,8 +143,7 @@ const transformGoogleGenerativeAIStream = (
// 1. There are reasoning parts in current chunk (thought: true)
// 2. There are multiple parts with images (multimodal content)
// 3. There are thoughtSignature in parts (reasoning metadata attached to content)
// 4. There is thoughtsTokenCount in metadata (indicates response contains reasoning)
// 5. This is Gemini 3 model with image generation (always use new format for consistency)
// 4. This is Gemini 3 model with image generation (always use new format for consistency)
// BUT NOT for:
// - The legacy single-image scenario
// - Grounding metadata scenario (uses legacy text + grounding events)
@@ -153,7 +151,6 @@ const transformGoogleGenerativeAIStream = (
(hasReasoningParts ||
(hasImageParts && parts.length > 1) ||
hasThoughtSignature ||
hasThoughtsInMetadata ||
isGemini3Model) &&
!isSingleImageWithFinish &&
!hasGroundingMetadata;