Paul RobertsonLearn to build a complete image recognition web app using TensorFlow.js with real-time webcam classification and object detection. Includes practical code examples, performance optimization, and deployment tips for web developers.
This article contains affiliate links. I may earn a commission at no extra cost to you.
title: "Computer Vision for Web Developers: Build an Image Recognition App with TensorFlow.js"
published: true
description: "Learn to add real-time image recognition and object detection to your web apps using TensorFlow.js - no machine learning background required."
tags: computervision, tensorflowjs, ai, javascript, webdev
Computer vision might sound like rocket science, but thanks to TensorFlow.js, you can add powerful image recognition capabilities to your web applications with surprisingly little code. In this tutorial, we'll build a complete image recognition app that can classify images in real-time and detect multiple objects - all running directly in the browser.
TensorFlow.js brings machine learning models directly to the browser, meaning:
Let's dive in and build something practical.
First, create a basic HTML structure:
<!DOCTYPE html>
<html>
<head>
<title>Computer Vision App</title>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@4.0.0/dist/tf.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/mobilenet@2.1.0/dist/mobilenet.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/coco-ssd@2.2.2/dist/coco-ssd.min.js"></script>
<style>
.container { max-width: 800px; margin: 0 auto; padding: 20px; }
.video-container { position: relative; display: inline-block; }
canvas { position: absolute; top: 0; left: 0; }
.predictions { margin-top: 20px; }
.prediction { padding: 10px; margin: 5px 0; background: #f0f0f0; border-radius: 5px; }
</style>
</head>
<body>
<div class="container">
<h1>Computer Vision Demo</h1>
<div class="video-container">
<video id="webcam" width="640" height="480" autoplay muted></video>
<canvas id="canvas" width="640" height="480"></canvas>
</div>
<div class="predictions" id="predictions"></div>
<button id="toggleCamera">Start Camera</button>
<input type="file" id="imageUpload" accept="image/*" style="margin-left: 10px;">
</div>
<script src="app.js"></script>
</body>
</html>
Now let's create our JavaScript application. We'll use two pre-trained models:
// app.js
class ComputerVisionApp {
constructor() {
this.video = document.getElementById('webcam');
this.canvas = document.getElementById('canvas');
this.ctx = this.canvas.getContext('2d');
this.predictionsDiv = document.getElementById('predictions');
this.mobilenetModel = null;
this.cocoSsdModel = null;
this.isStreaming = false;
this.init();
}
async init() {
// Load models
console.log('Loading models...');
try {
this.mobilenetModel = await mobilenet.load();
this.cocoSsdModel = await cocoSsd.load();
console.log('Models loaded successfully!');
} catch (error) {
console.error('Error loading models:', error);
this.showError('Failed to load AI models. Please refresh and try again.');
return;
}
this.setupEventListeners();
}
setupEventListeners() {
document.getElementById('toggleCamera').addEventListener('click', () => {
this.toggleCamera();
});
document.getElementById('imageUpload').addEventListener('change', (e) => {
this.handleImageUpload(e);
});
}
}
Let's add webcam functionality and real-time classification:
async toggleCamera() {
if (!this.isStreaming) {
try {
const stream = await navigator.mediaDevices.getUserMedia({
video: { width: 640, height: 480 }
});
this.video.srcObject = stream;
this.isStreaming = true;
document.getElementById('toggleCamera').textContent = 'Stop Camera';
// Start prediction loop
this.video.addEventListener('loadeddata', () => {
this.predictLoop();
});
} catch (error) {
console.error('Error accessing camera:', error);
this.showError('Camera access denied or not available.');
}
} else {
this.stopCamera();
}
}
stopCamera() {
if (this.video.srcObject) {
this.video.srcObject.getTracks().forEach(track => track.stop());
this.video.srcObject = null;
}
this.isStreaming = false;
document.getElementById('toggleCamera').textContent = 'Start Camera';
}
async predictLoop() {
if (!this.isStreaming) return;
// Clear canvas
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
// Run both classification and object detection
await Promise.all([
this.classifyImage(),
this.detectObjects()
]);
// Continue loop
requestAnimationFrame(() => this.predictLoop());
}
async classifyImage() {
if (!this.mobilenetModel) return;
try {
const predictions = await this.mobilenetModel.classify(this.video);
this.displayClassifications(predictions);
} catch (error) {
console.error('Classification error:', error);
}
}
Now let's implement object detection to identify and highlight multiple items:
async detectObjects() {
if (!this.cocoSsdModel) return;
try {
const predictions = await this.cocoSsdModel.detect(this.video);
this.drawBoundingBoxes(predictions);
} catch (error) {
console.error('Object detection error:', error);
}
}
drawBoundingBoxes(predictions) {
predictions.forEach(prediction => {
const [x, y, width, height] = prediction.bbox;
const confidence = (prediction.score * 100).toFixed(1);
// Draw bounding box
this.ctx.strokeStyle = '#00ff00';
this.ctx.lineWidth = 2;
this.ctx.strokeRect(x, y, width, height);
// Draw label background
const label = `${prediction.class} (${confidence}%)`;
this.ctx.fillStyle = '#00ff00';
this.ctx.fillRect(x, y - 25, this.ctx.measureText(label).width + 10, 25);
// Draw label text
this.ctx.fillStyle = '#000000';
this.ctx.font = '16px Arial';
this.ctx.fillText(label, x + 5, y - 7);
});
}
displayClassifications(predictions) {
const html = predictions
.slice(0, 3) // Show top 3 predictions
.map(pred => {
const confidence = (pred.probability * 100).toFixed(1);
return `
<div class="prediction">
<strong>${pred.className}</strong>: ${confidence}% confidence
</div>
`;
})
.join('');
this.predictionsDiv.innerHTML = html;
}
Let's also support static image analysis:
handleImageUpload(event) {
const file = event.target.files[0];
if (!file) return;
const img = new Image();
img.onload = async () => {
// Draw image to canvas
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
this.ctx.drawImage(img, 0, 0, this.canvas.width, this.canvas.height);
// Run predictions on uploaded image
if (this.mobilenetModel) {
const classifications = await this.mobilenetModel.classify(img);
this.displayClassifications(classifications);
}
if (this.cocoSsdModel) {
const detections = await this.cocoSsdModel.detect(img);
this.drawBoundingBoxes(detections);
}
};
img.src = URL.createObjectURL(file);
}
showError(message) {
this.predictionsDiv.innerHTML = `<div style="color: red; font-weight: bold;">${message}</div>`;
}
To ensure smooth performance, implement these optimizations:
// Add throttling to prediction loop
predictLoop() {
if (!this.isStreaming) return;
// Throttle predictions to ~10 FPS for better performance
setTimeout(async () => {
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
await Promise.all([
this.classifyImage(),
this.detectObjects()
]);
requestAnimationFrame(() => this.predictLoop());
}, 100);
}
// Preload models on page load
window.addEventListener('load', () => {
new ComputerVisionApp();
});
Add robust error handling for different browsers:
async init() {
// Check for required APIs
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
this.showError('Camera API not supported in this browser.');
return;
}
if (!window.tf) {
this.showError('TensorFlow.js failed to load.');
return;
}
// Load models with timeout
const loadTimeout = setTimeout(() => {
this.showError('Model loading timed out. Please check your internet connection.');
}, 30000);
try {
this.mobilenetModel = await mobilenet.load();
this.cocoSsdModel = await cocoSsd.load();
clearTimeout(loadTimeout);
console.log('Models loaded successfully!');
} catch (error) {
clearTimeout(loadTimeout);
console.error('Error loading models:', error);
this.showError('Failed to load AI models. Please refresh and try again.');
return;
}
this.setupEventListeners();
}
For production deployment:
For React integration:
// React hook example
import { useEffect, useRef, useState } from 'react';
import * as mobilenet from '@tensorflow-models/mobilenet';
const useComputerVision = () => {
const [model, setModel] = useState(null);
const [isLoading, setIsLoading] = useState(true);
useEffect(() => {
mobilenet.load().then(loadedModel => {
setModel(loadedModel);
setIsLoading(false);
});
}, []);
const classify = async (imageElement) => {
if (!model) return [];
return await model.classify(imageElement);
};
return { classify, isLoading };
};
You now have a complete computer vision application that can:
The beauty of TensorFlow.js is that it makes advanced AI accessible to web developers without requiring deep machine learning knowledge. Your users get powerful computer vision features while maintaining privacy, since everything runs locally in their browser.
Next steps you might consider:
The complete code is ready to run - jus
Tools mentioned: