refactor: use genkit with dataUrl

GoogleCloudPlatform · Nov 28, 2024 · 0353027 · 0353027
1 parent 50b7e30
commit 0353027
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 21 deletions.
diff --git a/firestore-multimodal-genai/POSTINSTALL.md b/firestore-multimodal-genai/POSTINSTALL.md
@@ -40,11 +40,13 @@ For Vertex AI, the list of models is [here](https://cloud.google.com/vertex-ai/d
 
 #### Multimodal Prompts
 
-The Gemini Pro Vision model accepts multimodal prompts. This extension allows for multimodal prompting using this model.
+Many of the Gemini models accept multimodal prompts. This extension allows for multimodal prompting with images using this model.
 
 On installation you may pick an `image` field. The image field must be the Cloud Storage URL of an object (e.g `gs://my-bucket.appspot.com/filename.png`). This image will then be provided as part of the prompt to Gemini Pro Vision.
 
-Note that Google AI requires prompts to have both an image and text part, whereas Vertex AI allows gemini-pro-vision to be prompted with text only as well. If you have selected to use the Gemini Pro Vision model and have Google AI as a provider then any document handled by the extension must contain an image field.
+Note that Google AI requires prompts to have both an image and text part, whereas Vertex AI allows gemini-pro-vision to be prompted with text only as well.
+
+If you have selected to use the Gemini Pro Vision model (deprecated) and have Google AI as a provider then any document handled by the extension must contain an image field.
 
 The Gemini Pro Vision API has a limit on image sizes. For Google AI this limit is currently 1MB, and for Vertex AI this limit is 4MB. This extension compress and resize images that fall above this limit.
 

diff --git a/firestore-multimodal-genai/functions/src/generative-client/genkit.ts b/firestore-multimodal-genai/functions/src/generative-client/genkit.ts
@@ -45,6 +45,7 @@ export class GenkitGenerativeClient extends GenerativeClient<
 
   //   We use this to check before creating the client to see if we should use the Genkit client
   static shouldUseGenkitClient(config: Config): boolean {
+    if (config.model.includes('pro-vision')) return false;
     const shouldReturnMultipleCandidates =
       config.candidates.shouldIncludeCandidatesField;
     return (
@@ -157,30 +158,20 @@ export class GenkitGenerativeClient extends GenerativeClient<
       }
     }
 
-    const messages: MessageData[] = [
-      {
-        role: 'user',
-        content: [{text: promptText}],
-      },
-    ];
+    const message: MessageData = {
+      role: 'user',
+      content: [{text: promptText}], // Initialize with the prompt text
+    };
 
     if (imageBase64) {
-      messages.push({
-        role: 'user',
-        content: [
-          {
-            text: 'Attached image:',
-          },
-          {
-            data: imageBase64,
-          },
-        ],
-      });
-    }
+      const dataUrl = `data:image/jpeg;base64,${imageBase64}`;
 
+      // Push additional content into the same message's content array
+      message.content.push({media: {url: dataUrl}});
+    }
     try {
       const response = await this.client.generate({
-        messages,
+        messages: [message],
         ...generateOptions,
       });