-
-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prompts: https://gist.github.com/simonw/c9b55757c8959b2c84d0d9bddd020f4d Which used https://gist.github.com/simonw/40ff639e96d55a1df7ebfa7db1974b92 See also: simonw/llm#557 (comment)
- Loading branch information
Showing
1 changed file
with
142 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Gemini API Image Processor with Bounding Box Visualization</title> | ||
<script type="module"> | ||
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai"; | ||
import { marked } from "https://esm.run/marked"; | ||
|
||
function getApiKey() { | ||
let apiKey = localStorage.getItem("GEMINI_API_KEY"); | ||
if (!apiKey) { | ||
apiKey = prompt("Please enter your Gemini API key:"); | ||
if (apiKey) { | ||
localStorage.setItem("GEMINI_API_KEY", apiKey); | ||
} | ||
} | ||
return apiKey; | ||
} | ||
|
||
async function getGenerativeModel(params) { | ||
const API_KEY = getApiKey(); | ||
const genAI = new GoogleGenerativeAI(API_KEY); | ||
return genAI.getGenerativeModel(params); | ||
} | ||
|
||
async function fileToGenerativePart(file) { | ||
return new Promise((resolve) => { | ||
const reader = new FileReader(); | ||
reader.onloadend = () => resolve({ | ||
inlineData: { | ||
data: reader.result.split(",")[1], | ||
mimeType: file.type | ||
} | ||
}); | ||
reader.readAsDataURL(file); | ||
}); | ||
} | ||
|
||
async function processImageAndPrompt() { | ||
const fileInput = document.getElementById('imageInput'); | ||
const promptInput = document.getElementById('promptInput'); | ||
const resultDiv = document.getElementById('result'); | ||
|
||
if (!fileInput.files[0] || !promptInput.value) { | ||
alert('Please select an image and enter a prompt.'); | ||
return; | ||
} | ||
|
||
resultDiv.innerHTML = 'Processing...'; | ||
|
||
try { | ||
const model = await getGenerativeModel({ model: "gemini-1.5-pro" }); | ||
const imagePart = await fileToGenerativePart(fileInput.files[0]); | ||
|
||
const result = await model.generateContent([promptInput.value, imagePart]); | ||
const response = await result.response; | ||
const text = response.text(); | ||
|
||
resultDiv.innerHTML = marked.parse(text); | ||
|
||
// Extract coordinates from the response | ||
const coordinates = extractCoordinates(text); | ||
if (coordinates.length > 0) { | ||
displayImageWithBoundingBoxes(fileInput.files[0], coordinates); | ||
} | ||
} catch (error) { | ||
resultDiv.innerHTML = `Error: ${error.message}`; | ||
} | ||
} | ||
|
||
function extractCoordinates(text) { | ||
const regex = /\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]/g; | ||
const matches = text.match(regex) || []; | ||
return matches.map(JSON.parse); | ||
} | ||
|
||
function displayImageWithBoundingBoxes(file, coordinates) { | ||
const reader = new FileReader(); | ||
reader.onload = function(event) { | ||
const image = new Image(); | ||
image.onload = function() { | ||
const canvas = document.getElementById('canvas'); | ||
canvas.width = image.width; | ||
canvas.height = image.height; | ||
const ctx = canvas.getContext('2d'); | ||
ctx.drawImage(image, 0, 0); | ||
|
||
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']; | ||
coordinates.forEach((box, index) => { | ||
const [ymin, xmin, ymax, xmax] = box.map(coord => (1000 - coord) / 1000); | ||
const width = (xmax - xmin) * image.width; | ||
const height = (ymax - ymin) * image.height; | ||
|
||
ctx.strokeStyle = colors[index % colors.length]; | ||
ctx.lineWidth = 2; | ||
ctx.strokeRect(xmin * image.width, ymin * image.height, width, height); | ||
}); | ||
}; | ||
image.src = event.target.result; | ||
}; | ||
reader.readAsDataURL(file); | ||
} | ||
|
||
// Attach event listener to the submit button | ||
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt); | ||
</script> | ||
<style> | ||
body { | ||
font-family: Arial, sans-serif; | ||
max-width: 800px; | ||
margin: 0 auto; | ||
padding: 20px; | ||
} | ||
textarea { | ||
width: 100%; | ||
height: 100px; | ||
} | ||
#result, #imageContainer { | ||
margin-top: 20px; | ||
border: 1px solid #ccc; | ||
padding: 10px; | ||
} | ||
#canvas { | ||
max-width: 100%; | ||
height: auto; | ||
} | ||
</style> | ||
</head> | ||
<body> | ||
<h1>Gemini API Image Processor with Bounding Box Visualization</h1> | ||
<input type="file" id="imageInput" accept="image/*"> | ||
<textarea id="promptInput">Return bounding boxes as [ymin, xmin, ymax, xmax] | ||
</textarea> | ||
<button id="submitBtn">Process</button> | ||
<div id="result"></div> | ||
<div id="imageContainer"> | ||
<canvas id="canvas"></canvas> | ||
</div> | ||
</body> | ||
</html> |