Fix prompt format for image description in prompt array (#1516)

* Fix prompt format for image description in prompt array * [Bug] Fix syntax error (#1515) * Fix syntax error * Fix prompt format for audio transcription in session.promptStreaming * Fix formatting --------- Co-authored-by: Sebastian Benz <sbenz@google.com>
2026-03-26 13:19:49 +07:00 · 2025-07-29 14:42:39 +08:00
parent eca778c3c6
commit 5f6f02b924
2 changed files with 11 additions and 2 deletions
--- a/functional-samples/ai.gemini-on-device-alt-texter/background.js
+++ b/functional-samples/ai.gemini-on-device-alt-texter/background.js
@@ -20,8 +20,16 @@ async function generateAltText(imgSrc) {

  // Run the prompt
  const prompt = [
-    `Please provide a functional, objective description of the provided image in no more than around 30 words so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment. If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond 30 words. It should not contain quotation marks, as those tend to cause issues when rendered on the web. If there is no text found in the image, then there is no need to mention it. You should not begin the description with any variation of “The image”.`,
-    { type: 'image', content: imageBitmap }
+    {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          value: `Please provide a functional, objective description of the provided image in no more than around 30 words so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment. If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond 30 words. It should not contain quotation marks, as those tend to cause issues when rendered on the web. If there is no text found in the image, then there is no need to mention it. You should not begin the description with any variation of “The image”.`
+        },
+        { type: 'image', value: imageBitmap }
+      ]
+    }
  ];
  return await session.prompt(prompt);
 }
--- a/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.js
+++ b/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.js
@@ -46,6 +46,7 @@ chrome.runtime.onMessage.addListener(async ({ data }) => {
    const session = await LanguageModel.create({
      expectedInputs: [{ type: 'audio' }]
    });
+
    const stream = session.promptStreaming([
      {
        role: 'user',