diff --git a/.gitignore b/.gitignore index 94d6171d..a9ad51c6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ node_modules _debug _metadata dist -*.swp # vim temp files +**/*.swp diff --git a/functional-samples/ai.gemini-on-device-alt-texter/README.md b/functional-samples/ai.gemini-on-device-alt-texter/README.md index 4d650a8b..bbd1a645 100644 --- a/functional-samples/ai.gemini-on-device-alt-texter/README.md +++ b/functional-samples/ai.gemini-on-device-alt-texter/README.md @@ -1,4 +1,4 @@ -# On-device multimodal AI with Gemini Nano - image understanding +# Alt-texter: On-device multimodal AI with Gemini Nano - image understanding This sample demonstrates how to use the image understanding capabilities of the multi-modal Gemini Nano API preview together with [Chrome's translation API](https://developer.chrome.com/docs/ai/translator-api). To learn more about the API and how to sign-up for the origin trial, head over to [Built-in AI on developer.chrome.com](https://developer.chrome.com/docs/extensions/ai/prompt-api). diff --git a/functional-samples/ai.gemini-on-device-alt-texter/background.js b/functional-samples/ai.gemini-on-device-alt-texter/background.js index 8ffe8fb5..4781a7e3 100644 --- a/functional-samples/ai.gemini-on-device-alt-texter/background.js +++ b/functional-samples/ai.gemini-on-device-alt-texter/background.js @@ -35,7 +35,7 @@ chrome.contextMenus.onClicked.addListener(async (info, tab) => { ]); chrome.runtime.sendMessage({ action: 'alt-text', - text: result.value === 'fulfilled' ? result.value : result.reason.message + text: result.status === 'fulfilled' ? result.value : result.reason.message }); } }); diff --git a/functional-samples/ai.gemini-on-device-alt-texter/manifest.json b/functional-samples/ai.gemini-on-device-alt-texter/manifest.json index bee63d67..8ce90835 100644 --- a/functional-samples/ai.gemini-on-device-alt-texter/manifest.json +++ b/functional-samples/ai.gemini-on-device-alt-texter/manifest.json @@ -2,7 +2,7 @@ "manifest_version": 3, "name": "Alt Texter", "version": "1.0", - "description": "Generates alt text for images using the Prompt API.", + "description": "Generates alt text for images using the Gemini Nano Prompt API.", "permissions": ["contextMenus", "clipboardWrite"], "host_permissions": [""], "minimum_chrome_version": "138", diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/README.md b/functional-samples/ai.gemini-on-device-audio-scribe/README.md new file mode 100644 index 00000000..7b055c43 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/README.md @@ -0,0 +1,20 @@ +# Audio-Scribe: On-device multimodal AI with Gemini Nano - audio transcription + +This sample demonstrates how to use the audio transcription capabilities of the multi-modal Gemini Nano API preview. To learn more about the API and how to sign-up for the origin trial, head over to [Built-in AI on developer.chrome.com](https://developer.chrome.com/docs/extensions/ai/prompt-api). + +## Overview + +This extension adds a sidepanel that will, when opened, display a transcription of all audio files on a web page (currently it looks only for audio files created using `URL.createObjectUrl`). + +## Running this extension + +1. Clone this repository. +1. Load this directory in Chrome as an [unpacked extension](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked). +1. Open the audio-scribe sidepanel by clicking the audio-scribe action or by pressing the `ALT + A` keyboard shortcut. +1. Open a chat app in the browser, for example https://web.whatsapp.com/. You can also run the demo chat app via: + ``` + npx serve demo-chat-app + ``` +1. All audio messages in the current chat will be transcribed in the side panel. + +![Screenshot displaying a demo chat app with a few audio messages. On the right, there is the audio-scribe extension's sidepanel which displayes the transcribed text messages](assets/screenshot.png) diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/assets/icon128.png b/functional-samples/ai.gemini-on-device-audio-scribe/assets/icon128.png new file mode 100644 index 00000000..35ac3893 Binary files /dev/null and b/functional-samples/ai.gemini-on-device-audio-scribe/assets/icon128.png differ diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/assets/screenshot.png b/functional-samples/ai.gemini-on-device-audio-scribe/assets/screenshot.png new file mode 100644 index 00000000..bbc9f789 Binary files /dev/null and b/functional-samples/ai.gemini-on-device-audio-scribe/assets/screenshot.png differ diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/background.js b/functional-samples/ai.gemini-on-device-audio-scribe/background.js new file mode 100644 index 00000000..bdb08b06 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/background.js @@ -0,0 +1,15 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }); diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/bridge.js b/functional-samples/ai.gemini-on-device-audio-scribe/bridge.js new file mode 100644 index 00000000..b2c89e34 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/bridge.js @@ -0,0 +1,22 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Forward messages from the content script in the MAIN world to the +// side panel +window.addEventListener('message', ({ data }) => { + if (data.type !== 'audio-scribe') { + return; + } + chrome.runtime.sendMessage({ data }); +}); diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/index.html b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/index.html new file mode 100644 index 00000000..5e35efd4 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/index.html @@ -0,0 +1,52 @@ + + + + + + Chat App Demo + + + +
+ +
+
+ + 😊 +

Alice

+
+
+ +
+
+ + +
+
+
+ + + + diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/intro.mp3 b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/intro.mp3 new file mode 100644 index 00000000..fa5250c9 Binary files /dev/null and b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/intro.mp3 differ diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/msg1.mp3 b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/msg1.mp3 new file mode 100644 index 00000000..46857e4d Binary files /dev/null and b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/msg1.mp3 differ diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/script.js b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/script.js new file mode 100644 index 00000000..ca1888a4 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/script.js @@ -0,0 +1,224 @@ +document.addEventListener('DOMContentLoaded', () => { + const messageList = document.getElementById('message-list'); + + const sampleMessages = [ + { type: 'received', text: '', isAudio: true, audioSrc: 'intro.mp3' }, + { type: 'sent', text: "I'm in a meeting right now" }, + { type: 'received', text: '', isAudio: true, audioSrc: 'msg1.mp3' }, + { type: 'sent', text: '🙄' } + ]; + + async function renderMessages() { + messageList.innerHTML = ''; // Clear existing messages + let delay = 0; // Initial delay + const delayIncrement = 3000; // 5 seconds in milliseconds + + await timeout(3000); + + sampleMessages.forEach(async (msg, index) => { + // Use setTimeout to delay the appearance of each message + setTimeout(async () => { + const messageElement = document.createElement('div'); + messageElement.classList.add('message', msg.type); + + // Add a class for fade-in animation (optional, but nice) + messageElement.style.opacity = '0'; // Start transparent + messageElement.style.transition = 'opacity 0.5s ease-in-out'; + + if (msg.isAudio && msg.audioSrc) { + messageElement.classList.add('audio'); + + // Create audio element (hidden controls) + const audioElement = document.createElement('audio'); + audioElement.preload = 'metadata'; // Important for getting duration + const sourceElement = document.createElement('source'); + sourceElement.type = 'audio/mpeg'; // Assuming MP3 + audioElement.appendChild(sourceElement); + + const response = await fetch(msg.audioSrc); + const data = await response.arrayBuffer(); + const blob = new Blob([data], { type: 'audio/wav' }); + sourceElement.src = URL.createObjectURL(blob); + // Keep the audio element in the DOM but hidden for playback logic + audioElement.style.display = 'none'; + messageElement.appendChild(audioElement); + + // Create custom controls container + const controlsContainer = document.createElement('div'); + controlsContainer.classList.add('audio-controls'); + + // Play/Pause Button + const playPauseButton = document.createElement('button'); + playPauseButton.classList.add('audio-play-pause'); + playPauseButton.textContent = '▶'; // Play icon initially + controlsContainer.appendChild(playPauseButton); + + // Progress Bar (Slider) + const progressBar = document.createElement('input'); + progressBar.type = 'range'; + progressBar.classList.add('audio-progress'); + progressBar.value = 0; + progressBar.min = 0; + progressBar.max = 100; // Will be updated with duration + progressBar.step = 0.1; + controlsContainer.appendChild(progressBar); + + // Duration Display + const durationDisplay = document.createElement('span'); + durationDisplay.classList.add('audio-duration'); + durationDisplay.textContent = '0:00'; // Initial display + controlsContainer.appendChild(durationDisplay); + + // Append custom controls to the message element + messageElement.appendChild(controlsContainer); + + // --- Event Listeners for Custom Controls --- + + // Format time helper function + function formatTime(seconds) { + const minutes = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + return `${minutes}:${secs < 10 ? '0' : ''}${secs}`; + } + + // Update duration when metadata loads + audioElement.addEventListener('loadedmetadata', () => { + progressBar.max = audioElement.duration; + durationDisplay.textContent = formatTime(audioElement.duration); + }); + + // Play/Pause functionality + playPauseButton.addEventListener('click', () => { + if (audioElement.paused) { + audioElement.play(); + playPauseButton.textContent = '❚❚'; // Pause icon + } else { + audioElement.pause(); + playPauseButton.textContent = '▶'; // Play icon + } + }); + + // Update progress bar as audio plays + audioElement.addEventListener('timeupdate', () => { + progressBar.value = audioElement.currentTime; + // Update duration display to show current time while playing (optional) + durationDisplay.textContent = `${formatTime(audioElement.currentTime)} / ${formatTime(audioElement.duration)}`; + }); + + // Seek audio when progress bar is changed + progressBar.addEventListener('input', () => { + audioElement.currentTime = progressBar.value; + }); + + // Reset button to play when audio ends + audioElement.addEventListener('ended', () => { + playPauseButton.textContent = '▶'; + progressBar.value = 0; // Reset progress bar + }); + } else { + messageElement.textContent = msg.text; + // Check if the message is emoji-only + if (isEmojiOnly(msg.text)) { + messageElement.classList.add('message-emoji-only'); + } + } + + messageList.appendChild(messageElement); + + // Trigger the fade-in effect + requestAnimationFrame(() => { + // Ensures the element is in the DOM before changing opacity + messageElement.style.opacity = '1'; + }); + + // Scroll to the bottom after adding the message + messageList.scrollTop = messageList.scrollHeight; + }, index * delayIncrement); // Stagger delay based on index + }); + } + + // Helper function to check if a string contains only emojis + function isEmojiOnly(str) { + // Regex to match one or more emojis and nothing else + const emojiRegex = /^(\p{Emoji_Presentation}|\p{Extended_Pictographic})+$/u; + return emojiRegex.test(str.trim()); + } + + renderMessages(); + + // Basic send functionality (optional, just for demo) + const sendButton = document.getElementById('send-button'); // Use ID selector + const messageInput = document.getElementById('message-input-field'); // Use ID selector + + function sendMessage() { + const text = messageInput.value.trim(); + if (text) { + // No need to re-render everything, just add the new message + const newMessage = { type: 'sent', text: text }; + sampleMessages.push(newMessage); // Add to data source + + // Create and append the new message element directly + const messageElement = document.createElement('div'); + messageElement.classList.add('message', 'sent'); + messageElement.textContent = text; + + // Check if the new message is emoji-only + if (isEmojiOnly(text)) { + messageElement.classList.add('message-emoji-only'); + } + + // Add fade-in effect (optional, consistent with renderMessages) + messageElement.style.opacity = '0'; + messageElement.style.transition = 'opacity 0.5s ease-in-out'; + + messageList.appendChild(messageElement); + + // Trigger fade-in + requestAnimationFrame(() => { + messageElement.style.opacity = '1'; + }); + + // Scroll to bottom + messageList.scrollTop = messageList.scrollHeight; + + messageInput.value = ''; // Clear input + } + } + + sendButton.addEventListener('click', sendMessage); + messageInput.addEventListener('keypress', (e) => { + if (e.key === 'Enter') { + sendMessage(); + } + }); + + // Chat switching functionality + const chatList = document.getElementById('chat-list'); + const chatItems = chatList.querySelectorAll('.chat-item'); + const currentChatName = document.getElementById('current-chat-name'); + const currentChatAvatar = document.getElementById('current-chat-avatar'); + + chatItems.forEach((item) => { + item.addEventListener('click', () => { + // Remove active class from previously active item + const currentActive = chatList.querySelector('.chat-item.active'); + if (currentActive) { + currentActive.classList.remove('active'); + } + + // Add active class to clicked item + item.classList.add('active'); + + // Update chat header + const chatName = item.querySelector('span:not(.avatar)').textContent; // Get name span specifically + const avatarEmoji = item.querySelector('.avatar').textContent; // Get emoji from avatar span + + currentChatName.textContent = chatName; + currentChatAvatar.textContent = avatarEmoji; // Set emoji in header avatar span + }); + }); +}); + +function timeout(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/style.css b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/style.css new file mode 100644 index 00000000..c5d04ab5 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/demo-chat-app/style.css @@ -0,0 +1,280 @@ +/* General Reset and Body Styles */ +body { + font-family: 'Segoe UI', 'Helvetica Neue', Helvetica, Arial, sans-serif; + margin: 0; + background-color: #f0f4f8; /* Lighter, cooler background */ + height: 100vh; + display: flex; + overflow: hidden; /* Prevent body scroll */ +} + +/* Main App Container */ +.app-container { + display: flex; + width: 100%; + height: 100%; + max-width: 1600px; + margin: auto; + background-color: #ffffff; /* Keep main container white */ + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1), 0 2px 8px rgba(0, 0, 0, 0.08); /* Slightly softer shadow */ +} + +/* Sidebar (Chat List) */ +.sidebar { + width: 30%; + min-width: 250px; + max-width: 400px; + background-color: #ffffff; + border-right: 1px solid #e8e8e8; /* Lighter separator */ + display: flex; + flex-direction: column; +} + +.sidebar h2 { + padding: 15px 20px; + margin: 0; + font-size: 18px; + font-weight: 500; + color: #333; /* Standard dark grey */ + border-bottom: 1px solid #e8e8e8; /* Lighter separator */ + background-color: #f8f9fa; /* Very light grey header */ +} + +#chat-list { + list-style: none; + padding: 0; + margin: 0; + overflow-y: auto; /* Allow scrolling for chat list */ + flex-grow: 1; +} + +/* Style for avatar emojis */ +.avatar { + width: 40px; + height: 40px; + border-radius: 50%; /* Make avatars circular */ + margin-right: 10px; /* Space between avatar and name */ + display: flex; /* Use flexbox for centering */ + align-items: center; + justify-content: center; + font-size: 24px; /* Adjust emoji size */ + background-color: #eee; /* Optional: Add a background */ + flex-shrink: 0; /* Prevent shrinking */ +} + +.chat-item { + padding: 10px 15px; /* Slightly less padding */ + border-bottom: 1px solid #f0f0f0; /* Lighter separator */ + cursor: pointer; + display: flex; + align-items: center; + /* gap: 15px; */ /* Replaced by avatar margin */ +} + +.chat-item:hover { + background-color: #f0f4f8; /* Light blue hover */ +} + +.chat-item.active { + background-color: #e2eaf1; /* Slightly darker blue for active */ +} + +/* Chat Panel (Conversation View) */ +.chat-panel { + flex-grow: 1; + display: flex; + flex-direction: column; + background-color: #e8f0f4; /* Lighter blue chat background */ +} + +.chat-header { + padding: 10px 16px; + background-color: #f8f9fa; /* Match sidebar header */ + border-bottom: 1px solid #e8e8e8; /* Lighter separator */ + display: flex; + align-items: center; + min-height: 40px; +} + +/* No specific override needed for header avatar anymore if base style is good */ +/* .chat-header .avatar { ... } */ /* Removed redundant/conflicting styles */ + +.chat-header h3 { + margin: 0; + font-size: 16px; + font-weight: 500; + color: #333; /* Standard dark grey */ +} + +/* Message List */ +.message-list { + flex-grow: 1; + padding: 20px 5%; /* Padding relative to width */ + overflow-y: auto; + display: flex; + flex-direction: column; + gap: 5px; /* Smaller gap between messages */ +} + +/* Individual Messages */ +.message { + padding: 6px 12px; + border-radius: 7.5px; /* WhatsApp's bubble radius */ + max-width: 65%; + word-wrap: break-word; + box-shadow: 0 1px 1px rgba(0, 0, 0, 0.08); /* Softer shadow */ + font-size: 14.5px; + line-height: 1.4; +} + +.message.sent { + background-color: #cce5ff; /* Light blue for sent messages */ + color: #004085; /* Darker blue text for contrast */ + align-self: flex-end; + margin-left: auto; +} + +.message.received { + background-color: #ffffff; /* White for received */ + color: #333; /* Standard dark text */ + align-self: flex-start; + margin-right: auto; +} + +/* Style for messages containing only emojis */ +.message.message-emoji-only { + font-size: 29px; /* Double the base message font size */ + line-height: 1.2; /* Adjust line height for larger font */ + padding: 2px 6px; /* Adjust padding slightly */ + /* Optional: Remove background for pure emoji look */ + /* background-color: transparent; */ + /* box-shadow: none; */ +} + +/* Message Input Area */ +.message-input { + display: flex; + align-items: center; + padding: 10px 20px; + background-color: #f8f9fa; /* Match header background */ + border-top: 1px solid #e8e8e8; /* Lighter separator */ +} + +#message-input-field { /* Use the ID from HTML */ + flex-grow: 1; + padding: 10px 15px; + border: none; /* Remove default border */ + border-radius: 20px; /* Rounded input */ + margin-right: 10px; + font-size: 15px; + outline: none; +} + +#send-button { + padding: 10px 15px; + background-color: #007bff; /* Standard blue send button */ + color: white; + border: none; + border-radius: 20px; /* Rounded corners like input */ + cursor: pointer; + font-size: 15px; /* Match input field font size */ + /* Remove fixed width and height */ + display: flex; + justify-content: center; + align-items: center; + /* Consider adding an SVG icon here */ +} + +#send-button:hover { + background-color: #0056b3; /* Darker blue on hover */ +} + +/* Custom Audio Player Styles */ +.message.audio { + /* Adjust padding if needed for controls */ + padding-top: 8px; + padding-bottom: 8px; +} + +.audio-controls { + display: flex; + align-items: center; + gap: 10px; /* Space between button, progress, duration */ + width: 100%; /* Ensure controls take available width */ + margin-top: 5px; /* Space above controls if there was text */ +} + +.audio-play-pause { + background-color: #007bff; /* Match send button color */ + color: white; + border: none; + border-radius: 50%; /* Circular button */ + width: 30px; + height: 30px; + font-size: 14px; /* Adjust icon size */ + cursor: pointer; + display: flex; + justify-content: center; + align-items: center; + padding: 0; /* Remove default padding */ + flex-shrink: 0; /* Prevent button from shrinking */ +} + +.audio-play-pause:hover { + background-color: #0056b3; /* Darker blue on hover */ +} + +.audio-progress { + flex-grow: 1; /* Take up remaining space */ + height: 6px; /* Slimmer progress bar */ + cursor: pointer; + appearance: none; /* Override default look */ + background: #ddd; /* Track background */ + border-radius: 3px; + outline: none; +} + +/* Styling the progress bar thumb (the draggable part) */ +.audio-progress::-webkit-slider-thumb { + appearance: none; + width: 12px; + height: 12px; + background: #007bff; /* Thumb color */ + border-radius: 50%; + cursor: pointer; + margin-top: -3px; /* Adjust vertical alignment ( (track_height - thumb_height) / 2 ) */ +} + +.audio-progress::-moz-range-thumb { + width: 12px; + height: 12px; + background: #007bff; + border-radius: 50%; + cursor: pointer; + border: none; /* Remove default border in Firefox */ +} + +/* Styling the progress bar track */ +.audio-progress::-webkit-slider-runnable-track { + height: 6px; + background: #ccc; /* Slightly darker track */ + border-radius: 3px; +} + +.audio-progress::-moz-range-track { + height: 6px; + background: #ccc; + border-radius: 3px; +} + +.audio-duration { + font-size: 12px; + color: #555; /* Muted color for duration */ + min-width: 35px; /* Ensure space for MM:SS */ + text-align: right; + flex-shrink: 0; /* Prevent duration from shrinking */ +} + +#chat { + background-color: red; +} diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/manifest.json b/functional-samples/ai.gemini-on-device-audio-scribe/manifest.json new file mode 100644 index 00000000..16e109ec --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/manifest.json @@ -0,0 +1,40 @@ +{ + "name": "Audio scribe", + "description": "Use Gemini Nano to transcribe audio messages in chat conversations.", + "version": "0.2", + "icons": { + "128": "assets/icon128.png" + }, + "background": { + "service_worker": "background.js" + }, + "content_scripts": [ + { + "matches": [""], + "js": ["override-createobject-url.js"], + "run_at": "document_start", + "world": "MAIN" + }, + { + "matches": [""], + "js": ["bridge.js"], + "run_at": "document_start" + } + ], + "action": {}, + "side_panel": { + "default_path": "sidepanel.html" + }, + "permissions": ["sidePanel"], + "commands": { + "_execute_action": { + "suggested_key": { + "windows": "Alt+A", + "mac": "Alt+A", + "chromeos": "Alt+A", + "linux": "Alt+A" + } + } + }, + "manifest_version": 3 +} diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/override-createobject-url.js b/functional-samples/ai.gemini-on-device-audio-scribe/override-createobject-url.js new file mode 100644 index 00000000..6218062e --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/override-createobject-url.js @@ -0,0 +1,24 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +const originalCreateObjectURL = URL.createObjectURL; + +// Signal the creation of object URLs to the side panel +// Note: you should be only doing this for specific websites +// and not for all (as we do in this demo) +URL.createObjectURL = (object) => { + const objectUrl = originalCreateObjectURL.call(URL, object); + window.postMessage({ type: 'audio-scribe', objectUrl }); + return objectUrl; +}; diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.html b/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.html new file mode 100644 index 00000000..62b8d485 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.html @@ -0,0 +1,47 @@ + + + + + + +
    + + + diff --git a/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.js b/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.js new file mode 100644 index 00000000..60118c00 --- /dev/null +++ b/functional-samples/ai.gemini-on-device-audio-scribe/sidepanel.js @@ -0,0 +1,78 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* global LanguageModel */ + +chrome.runtime.onMessage.addListener(async ({ data }) => { + let content; + try { + if (data.type != 'audio-scribe' || !data || !isValidUrl(data.objectUrl)) { + return; + } + // Check if it's an audio file + const audio = await fetch(data.objectUrl); + content = await audio.blob(); + if (!content.type || !content.type.startsWidth('audio/')) { + return; + } + } catch (e) { + console.log(e); + } + + // Setup message UI + const messages = document.getElementById('messages'); + const li = document.createElement('li'); + li.append('...'); + messages.append(li); + + try { + // Transcribe audio + const availability = await LanguageModel.availability(); + if (availability !== 'available') { + console.error('Model is', availability); + throw new Error('Model is not available'); + } + const session = await LanguageModel.create({ + expectedInputs: [{ type: 'audio' }] + }); + const stream = session.promptStreaming([ + { type: 'audio', content }, + 'transcribe this audio' + ]); + + // Render streamed response + let first = true; + for await (const chunk of stream) { + if (first) { + li.textContent = ''; + first = false; + } + li.append(chunk); + } + } catch (error) { + console.log(error); + li.textContent = error.message; + } +}); + +function isValidUrl(string) { + let url; + + try { + url = new URL(string); + return true; + } catch (_) { + return false; + } +} diff --git a/functional-samples/ai.gemini-on-device-calendar-mate/manifest.json b/functional-samples/ai.gemini-on-device-calendar-mate/manifest.json index cb338c9d..eb2ff761 100644 --- a/functional-samples/ai.gemini-on-device-calendar-mate/manifest.json +++ b/functional-samples/ai.gemini-on-device-calendar-mate/manifest.json @@ -2,7 +2,7 @@ "manifest_version": 3, "name": "Calendar Mate", "version": "1.0", - "description": "Create Google Calendar events from selected text.", + "description": "Create Google Calendar events from selected text using Gemini Nano.", "permissions": ["contextMenus"], "minimum_chrome_version": "138", "background": { diff --git a/functional-samples/ai.gemini-on-device/manifest.json b/functional-samples/ai.gemini-on-device/manifest.json index c309548c..76c8fb74 100644 --- a/functional-samples/ai.gemini-on-device/manifest.json +++ b/functional-samples/ai.gemini-on-device/manifest.json @@ -2,7 +2,7 @@ "name": "Chrome Prompt AI Demo", "version": "0.2", "manifest_version": 3, - "description": "Try Chrome's built-in prompt API.", + "description": "Try Chrome's built-in prompt API built with Gemini Nano.", "background": { "service_worker": "background.js" },