Skip to content

Commit

Permalink
Gemini bbox visualization tool
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw authored Aug 26, 2024
1 parent 1390c40 commit 33199b2
Showing 1 changed file with 142 additions and 0 deletions.
142 changes: 142 additions & 0 deletions gemini-bbox-tool.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gemini API Image Processor with Bounding Box Visualization</title>
<script type="module">
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai";
import { marked } from "https://esm.run/marked";

function getApiKey() {
let apiKey = localStorage.getItem("GEMINI_API_KEY");
if (!apiKey) {
apiKey = prompt("Please enter your Gemini API key:");
if (apiKey) {
localStorage.setItem("GEMINI_API_KEY", apiKey);
}
}
return apiKey;
}

async function getGenerativeModel(params) {
const API_KEY = getApiKey();
const genAI = new GoogleGenerativeAI(API_KEY);
return genAI.getGenerativeModel(params);
}

async function fileToGenerativePart(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve({
inlineData: {
data: reader.result.split(",")[1],
mimeType: file.type
}
});
reader.readAsDataURL(file);
});
}

async function processImageAndPrompt() {
const fileInput = document.getElementById('imageInput');
const promptInput = document.getElementById('promptInput');
const resultDiv = document.getElementById('result');

if (!fileInput.files[0] || !promptInput.value) {
alert('Please select an image and enter a prompt.');
return;
}

resultDiv.innerHTML = 'Processing...';

try {
const model = await getGenerativeModel({ model: "gemini-1.5-pro" });
const imagePart = await fileToGenerativePart(fileInput.files[0]);

const result = await model.generateContent([promptInput.value, imagePart]);
const response = await result.response;
const text = response.text();

resultDiv.innerHTML = marked.parse(text);

// Extract coordinates from the response
const coordinates = extractCoordinates(text);
if (coordinates.length > 0) {
displayImageWithBoundingBoxes(fileInput.files[0], coordinates);
}
} catch (error) {
resultDiv.innerHTML = `Error: ${error.message}`;
}
}

function extractCoordinates(text) {
const regex = /\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]/g;
const matches = text.match(regex) || [];
return matches.map(JSON.parse);
}

function displayImageWithBoundingBoxes(file, coordinates) {
const reader = new FileReader();
reader.onload = function(event) {
const image = new Image();
image.onload = function() {
const canvas = document.getElementById('canvas');
canvas.width = image.width;
canvas.height = image.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(image, 0, 0);

const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
coordinates.forEach((box, index) => {
const [ymin, xmin, ymax, xmax] = box.map(coord => (1000 - coord) / 1000);
const width = (xmax - xmin) * image.width;
const height = (ymax - ymin) * image.height;

ctx.strokeStyle = colors[index % colors.length];
ctx.lineWidth = 2;
ctx.strokeRect(xmin * image.width, ymin * image.height, width, height);
});
};
image.src = event.target.result;
};
reader.readAsDataURL(file);
}

// Attach event listener to the submit button
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
</script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
textarea {
width: 100%;
height: 100px;
}
#result, #imageContainer {
margin-top: 20px;
border: 1px solid #ccc;
padding: 10px;
}
#canvas {
max-width: 100%;
height: auto;
}
</style>
</head>
<body>
<h1>Gemini API Image Processor with Bounding Box Visualization</h1>
<input type="file" id="imageInput" accept="image/*">
<textarea id="promptInput">Return bounding boxes as [ymin, xmin, ymax, xmax]
</textarea>
<button id="submitBtn">Process</button>
<div id="result"></div>
<div id="imageContainer">
<canvas id="canvas"></canvas>
</div>
</body>
</html>

0 comments on commit 33199b2

Please sign in to comment.