diff --git a/.vscode/settings.json b/.vscode/settings.json index ba0a285fa8..6a914c5401 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -34,7 +34,7 @@ // make stylelint work with tsx antd-style css template string "typescriptreact" ], - "vitest.maximumConfigs": 10, + "vitest.maximumConfigs": 20, "workbench.editor.customLabels.patterns": { "**/app/**/[[]*[]]/[[]*[]]/page.tsx": "${dirname(2)}/${dirname(1)}/${dirname} • page component", "**/app/**/[[]*[]]/page.tsx": "${dirname(1)}/${dirname} • page component", @@ -81,8 +81,7 @@ "**/src/store/*/slices/*/reducer.ts": "${dirname(2)}/${dirname} • reducer", "**/src/config/modelProviders/*.ts": "${filename} • provider", - "**/src/config/aiModels/*.ts": "${filename} • model", - "**/src/config/paramsSchemas/*/*.json": "${dirname(1)}/${filename} • params", + "**/packages/model-bank/src/aiModels/aiModels/*.ts": "${filename} • model", "**/packages/model-runtime/src/*/index.ts": "${dirname} • runtime", "**/src/server/services/*/index.ts": "${dirname} • server/service", diff --git a/packages/database/src/models/__tests__/generationBatch.test.ts b/packages/database/src/models/__tests__/generationBatch.test.ts index 45b0b39312..18dd4f8297 100644 --- a/packages/database/src/models/__tests__/generationBatch.test.ts +++ b/packages/database/src/models/__tests__/generationBatch.test.ts @@ -2,7 +2,7 @@ import { eq } from 'drizzle-orm'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { LobeChatDatabase } from '../../type';import { AsyncTaskStatus } from '@/types/asyncTask'; +import { AsyncTaskStatus } from '@/types/asyncTask'; import { GenerationConfig } from '@/types/generation'; import { @@ -12,6 +12,7 @@ import { generations, users, } from '../../schemas'; +import { LobeChatDatabase } from '../../type'; import { GenerationBatchModel } from '../generationBatch'; import { getTestDB } from './_util'; @@ -367,6 +368,51 @@ describe('GenerationBatchModel', () => { }); }); + it('should transform single config imageUrl through FileService', async () => { + const [createdBatch] = await serverDB + .insert(generationBatches) + .values({ + ...testBatch, + userId, + config: { imageUrl: 'single-image.jpg', prompt: 'test prompt' }, + }) + .returning(); + + const results = await generationBatchModel.queryGenerationBatchesByTopicIdWithGenerations( + testTopic.id, + ); + + expect(results[0].config).toEqual({ + imageUrl: 'https://example.com/single-image.jpg', + prompt: 'test prompt', + }); + }); + + it('should transform both imageUrl and imageUrls when both are present', async () => { + const [createdBatch] = await serverDB + .insert(generationBatches) + .values({ + ...testBatch, + userId, + config: { + imageUrl: 'single-image.jpg', + imageUrls: ['url1.jpg', 'url2.jpg'], + prompt: 'test prompt', + }, + }) + .returning(); + + const results = await generationBatchModel.queryGenerationBatchesByTopicIdWithGenerations( + testTopic.id, + ); + + expect(results[0].config).toEqual({ + imageUrl: 'https://example.com/single-image.jpg', + imageUrls: ['https://example.com/url1.jpg', 'https://example.com/url2.jpg'], + prompt: 'test prompt', + }); + }); + it('should handle config without imageUrls', async () => { const [createdBatch] = await serverDB .insert(generationBatches) diff --git a/packages/database/src/models/generationBatch.ts b/packages/database/src/models/generationBatch.ts index 68afad6831..c7e82f0672 100644 --- a/packages/database/src/models/generationBatch.ts +++ b/packages/database/src/models/generationBatch.ts @@ -1,7 +1,6 @@ import debug from 'debug'; import { and, eq } from 'drizzle-orm'; -import { LobeChatDatabase } from '../type'; import { FileService } from '@/server/services/file'; import { Generation, GenerationAsset, GenerationBatch, GenerationConfig } from '@/types/generation'; @@ -11,6 +10,7 @@ import { NewGenerationBatch, generationBatches, } from '../schemas/generation'; +import { LobeChatDatabase } from '../type'; import { GenerationModel } from './generation'; const log = debug('lobe-image:generation-batch-model'); @@ -121,6 +121,13 @@ export class GenerationBatchModel { // Transform config (async () => { const config = batch.config as GenerationConfig; + + // Handle single imageUrl + if (config.imageUrl) { + config.imageUrl = await this.fileService.getFullFileUrl(config.imageUrl); + } + + // Handle imageUrls array if (Array.isArray(config.imageUrls)) { config.imageUrls = await Promise.all( config.imageUrls.map((url) => this.fileService.getFullFileUrl(url)), diff --git a/packages/model-bank/src/aiModels/aihubmix.ts b/packages/model-bank/src/aiModels/aihubmix.ts index 1e9771348e..1c993a4a11 100644 --- a/packages/model-bank/src/aiModels/aihubmix.ts +++ b/packages/model-bank/src/aiModels/aihubmix.ts @@ -700,7 +700,7 @@ const aihubmixModels: AIChatModelCard[] = [ }, contextWindowTokens: 32_768 + 8192, description: 'Gemini 2.5 Flash 实验模型,支持图像生成', - displayName: 'Gemini 2.5 Flash Image Preview', + displayName: 'Nano Banana', id: 'gemini-2.5-flash-image-preview', maxOutput: 8192, pricing: { diff --git a/packages/model-bank/src/aiModels/google.ts b/packages/model-bank/src/aiModels/google.ts index 5c9a591277..06b9377ab0 100644 --- a/packages/model-bank/src/aiModels/google.ts +++ b/packages/model-bank/src/aiModels/google.ts @@ -196,8 +196,8 @@ const googleChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 32_768 + 8192, description: - 'Gemini 2.5 Flash Image Preview 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。', - displayName: 'Gemini 2.5 Flash Image Preview', + 'Nano Banana 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。', + displayName: 'Nano Banana', enabled: true, id: 'gemini-2.5-flash-image-preview', maxOutput: 8192, @@ -610,12 +610,12 @@ const imagenBaseParameters: ModelParamsSchema = { /* eslint-disable sort-keys-fix/sort-keys-fix */ const googleImageModels: AIImageModelCard[] = [ { - displayName: 'Gemini 2.5 Flash Image Preview', + displayName: 'Nano Banana', id: 'gemini-2.5-flash-image-preview:image', enabled: true, type: 'image', description: - 'Gemini 2.5 Flash Image Preview 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。', + 'Nano Banana 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。', releasedAt: '2025-08-26', parameters: CHAT_MODEL_IMAGE_GENERATION_PARAMS, pricing: { diff --git a/packages/model-bank/src/aiModels/openrouter.ts b/packages/model-bank/src/aiModels/openrouter.ts index e25f85e3de..6ffd708b1b 100644 --- a/packages/model-bank/src/aiModels/openrouter.ts +++ b/packages/model-bank/src/aiModels/openrouter.ts @@ -37,7 +37,7 @@ const openrouterChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 32_768 + 8192, description: 'Gemini 2.5 Flash 实验模型,支持图像生成', - displayName: 'Gemini 2.5 Flash Image Preview', + displayName: 'Nano Banana', id: 'google/gemini-2.5-flash-image-preview', maxOutput: 8192, pricing: { @@ -57,7 +57,7 @@ const openrouterChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 32_768 + 8192, description: 'Gemini 2.5 Flash 实验模型,支持图像生成', - displayName: 'Gemini 2.5 Flash Image Preview (free)', + displayName: 'Nano Banana (free)', id: 'google/gemini-2.5-flash-image-preview:free', maxOutput: 8192, releasedAt: '2025-08-26', diff --git a/packages/model-bank/src/aiModels/vertexai.ts b/packages/model-bank/src/aiModels/vertexai.ts index a8f6841f32..5d7ce95e6c 100644 --- a/packages/model-bank/src/aiModels/vertexai.ts +++ b/packages/model-bank/src/aiModels/vertexai.ts @@ -126,8 +126,8 @@ const vertexaiChatModels: AIChatModelCard[] = [ }, contextWindowTokens: 32_768 + 8192, description: - 'Gemini 2.5 Flash Image Preview 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。', - displayName: 'Gemini 2.5 Flash Image Preview', + 'Nano Banana 是 Google 最新、最快、最高效的原生多模态模型,它允许您通过对话生成和编辑图像。', + displayName: 'Nano Banana', enabled: true, id: 'gemini-2.5-flash-image-preview', maxOutput: 8192, diff --git a/packages/model-runtime/src/google/createImage.ts b/packages/model-runtime/src/google/createImage.ts index 3aa2ebc203..4651f45775 100644 --- a/packages/model-runtime/src/google/createImage.ts +++ b/packages/model-runtime/src/google/createImage.ts @@ -6,6 +6,40 @@ import { parseGoogleErrorMessage } from '../utils/googleErrorParser'; import { imageUrlToBase64 } from '../utils/imageToBase64'; import { parseDataUri } from '../utils/uriParser'; +// Maximum number of images allowed for processing +const MAX_IMAGE_COUNT = 10; + +/** + * Process a single image URL and convert it to Google AI Part format + */ +async function processImageForParts(imageUrl: string): Promise { + const { mimeType, base64, type } = parseDataUri(imageUrl); + + if (type === 'base64') { + if (!base64) { + throw new TypeError("Image URL doesn't contain base64 data"); + } + + return { + inlineData: { + data: base64, + mimeType: mimeType || 'image/png', + }, + }; + } else if (type === 'url') { + const { base64: urlBase64, mimeType: urlMimeType } = await imageUrlToBase64(imageUrl); + + return { + inlineData: { + data: urlBase64, + mimeType: urlMimeType, + }, + }; + } else { + throw new TypeError(`currently we don't support image url: ${imageUrl}`); + } +} + /** * Extract image data from generateContent response */ @@ -71,36 +105,30 @@ async function generateImageByChatModel( const { model, params } = payload; const actualModel = model.replace(':image', ''); + // Check for conflicting image parameters + if (params.imageUrl && params.imageUrls && params.imageUrls.length > 0) { + throw new TypeError('Cannot provide both imageUrl and imageUrls parameters simultaneously'); + } + // Build content parts const parts: Part[] = [{ text: params.prompt }]; // Add image for editing if provided if (params.imageUrl && params.imageUrl !== null) { - const { mimeType, base64, type } = parseDataUri(params.imageUrl); + const imagePart = await processImageForParts(params.imageUrl); + parts.push(imagePart); + } - if (type === 'base64') { - if (!base64) { - throw new TypeError("Image URL doesn't contain base64 data"); - } - - parts.push({ - inlineData: { - data: base64, - mimeType: mimeType || 'image/png', - }, - }); - } else if (type === 'url') { - const { base64: urlBase64, mimeType: urlMimeType } = await imageUrlToBase64(params.imageUrl); - - parts.push({ - inlineData: { - data: urlBase64, - mimeType: urlMimeType, - }, - }); - } else { - throw new TypeError(`currently we don't support image url: ${params.imageUrl}`); + // Add multiple images for editing if provided + if (params.imageUrls && Array.isArray(params.imageUrls) && params.imageUrls.length > 0) { + if (params.imageUrls.length > MAX_IMAGE_COUNT) { + throw new TypeError(`Too many images provided. Maximum ${MAX_IMAGE_COUNT} images allowed`); } + + const imageParts = await Promise.all( + params.imageUrls.map((imageUrl) => processImageForParts(imageUrl)), + ); + parts.push(...imageParts); } const contents: Content[] = [