This article contains affiliate links. I may earn a commission at no extra cost to you.
title: "Computer Vision for Web Developers: Build an Image Recognition App with TensorFlow.js"
published: true
description: "Learn to add real-time image recognition and object detection to your web apps using TensorFlow.js - no machine learning background required."
tags: computervision, tensorflowjs, ai, javascript, webdev
cover_image: https://dev-to-uploads.s3.amazonaws.com/uploads/articles/placeholder.jpg
Computer vision might sound like rocket science, but thanks to TensorFlow.js, you can add powerful image recognition capabilities to your web applications with surprisingly little code. In this tutorial, we'll build a complete image recognition app that can classify images in real-time and detect multiple objects - all running directly in the browser.
Why TensorFlow.js for Computer Vision?
TensorFlow.js brings machine learning models directly to the browser, meaning:
- No server-side processing required
- Real-time performance with webcam input
- User privacy (images never leave their device)
- Works offline once models are loaded
Let's dive in and build something practical.
Setting Up Your Project
First, create a basic HTML structure:
<!DOCTYPE html>
<html>
<head>
<title>Computer Vision App</title>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@4.0.0/dist/tf.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/mobilenet@2.1.0/dist/mobilenet.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/coco-ssd@2.2.2/dist/coco-ssd.min.js"></script>
<style>
.container { max-width: 800px; margin: 0 auto; padding: 20px; }
.video-container { position: relative; display: inline-block; }
canvas { position: absolute; top: 0; left: 0; }
.predictions { margin-top: 20px; }
.prediction { padding: 10px; margin: 5px 0; background: #f0f0f0; border-radius: 5px; }
</style>
</head>
<body>
<div class="container">
<h1>Computer Vision Demo</h1>
<div class="video-container">
<video id="webcam" width="640" height="480" autoplay muted></video>
<canvas id="canvas" width="640" height="480"></canvas>
</div>
<div class="predictions" id="predictions"></div>
<button id="toggleCamera">Start Camera</button>
<input type="file" id="imageUpload" accept="image/*" style="margin-left: 10px;">
</div>
<script src="app.js"></script>
</body>
</html>
Loading Pre-trained Models
Now let's create our JavaScript application. We'll use two pre-trained models:
- MobileNet: For image classification
- COCO-SSD: For object detection
// app.js
class ComputerVisionApp {
constructor() {
this.video = document.getElementById('webcam');
this.canvas = document.getElementById('canvas');
this.ctx = this.canvas.getContext('2d');
this.predictionsDiv = document.getElementById('predictions');
this.mobilenetModel = null;
this.cocoSsdModel = null;
this.isStreaming = false;
this.init();
}
async init() {
// Load models
console.log('Loading models...');
try {
this.mobilenetModel = await mobilenet.load();
this.cocoSsdModel = await cocoSsd.load();
console.log('Models loaded successfully!');
} catch (error) {
console.error('Error loading models:', error);
this.showError('Failed to load AI models. Please refresh and try again.');
return;
}
this.setupEventListeners();
}
setupEventListeners() {
document.getElementById('toggleCamera').addEventListener('click', () => {
this.toggleCamera();
});
document.getElementById('imageUpload').addEventListener('change', (e) => {
this.handleImageUpload(e);
});
}
}
Implementing Real-time Image Classification
Let's add webcam functionality and real-time classification:
async toggleCamera() {
if (!this.isStreaming) {
try {
const stream = await navigator.mediaDevices.getUserMedia({
video: { width: 640, height: 480 }
});
this.video.srcObject = stream;
this.isStreaming = true;
document.getElementById('toggleCamera').textContent = 'Stop Camera';
// Start prediction loop
this.video.addEventListener('loadeddata', () => {
this.predictLoop();
});
} catch (error) {
console.error('Error accessing camera:', error);
this.showError('Camera access denied or not available.');
}
} else {
this.stopCamera();
}
}
stopCamera() {
if (this.video.srcObject) {
this.video.srcObject.getTracks().forEach(track => track.stop());
this.video.srcObject = null;
}
this.isStreaming = false;
document.getElementById('toggleCamera').textContent = 'Start Camera';
}
async predictLoop() {
if (!this.isStreaming) return;
// Clear canvas
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
// Run both classification and object detection
await Promise.all([
this.classifyImage(),
this.detectObjects()
]);
// Continue loop
requestAnimationFrame(() => this.predictLoop());
}
async classifyImage() {
if (!this.mobilenetModel) return;
try {
const predictions = await this.mobilenetModel.classify(this.video);
this.displayClassifications(predictions);
} catch (error) {
console.error('Classification error:', error);
}
}
Adding Object Detection
Now let's implement object detection to identify and highlight multiple items:
async detectObjects() {
if (!this.cocoSsdModel) return;
try {
const predictions = await this.cocoSsdModel.detect(this.video);
this.drawBoundingBoxes(predictions);
} catch (error) {
console.error('Object detection error:', error);
}
}
drawBoundingBoxes(predictions) {
predictions.forEach(prediction => {
const [x, y, width, height] = prediction.bbox;
const confidence = (prediction.score * 100).toFixed(1);
// Draw bounding box
this.ctx.strokeStyle = '#00ff00';
this.ctx.lineWidth = 2;
this.ctx.strokeRect(x, y, width, height);
// Draw label background
const label = `${prediction.class} (${confidence}%)`;
this.ctx.fillStyle = '#00ff00';
this.ctx.fillRect(x, y - 25, this.ctx.measureText(label).width + 10, 25);
// Draw label text
this.ctx.fillStyle = '#000000';
this.ctx.font = '16px Arial';
this.ctx.fillText(label, x + 5, y - 7);
});
}
displayClassifications(predictions) {
const html = predictions
.slice(0, 3) // Show top 3 predictions
.map(pred => {
const confidence = (pred.probability * 100).toFixed(1);
return `
<div class="prediction">
<strong>${pred.className}</strong>: ${confidence}% confidence
</div>
`;
})
.join('');
this.predictionsDiv.innerHTML = html;
}
Handling Image Upload
Let's also support static image analysis:
handleImageUpload(event) {
const file = event.target.files[0];
if (!file) return;
const img = new Image();
img.onload = async () => {
// Draw image to canvas
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
this.ctx.drawImage(img, 0, 0, this.canvas.width, this.canvas.height);
// Run predictions on uploaded image
if (this.mobilenetModel) {
const classifications = await this.mobilenetModel.classify(img);
this.displayClassifications(classifications);
}
if (this.cocoSsdModel) {
const detections = await this.cocoSsdModel.detect(img);
this.drawBoundingBoxes(detections);
}
};
img.src = URL.createObjectURL(file);
}
showError(message) {
this.predictionsDiv.innerHTML = `<div style="color: red; font-weight: bold;">${message}</div>`;
}
Performance Optimization
To ensure smooth performance, implement these optimizations:
// Add throttling to prediction loop
predictLoop() {
if (!this.isStreaming) return;
// Throttle predictions to ~10 FPS for better performance
setTimeout(async () => {
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
await Promise.all([
this.classifyImage(),
this.detectObjects()
]);
requestAnimationFrame(() => this.predictLoop());
}, 100);
}
// Preload models on page load
window.addEventListener('load', () => {
new ComputerVisionApp();
});
Browser Compatibility and Error Handling
Add robust error handling for different browsers:
async init() {
// Check for required APIs
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
this.showError('Camera API not supported in this browser.');
return;
}
if (!window.tf) {
this.showError('TensorFlow.js failed to load.');
return;
}
// Load models with timeout
const loadTimeout = setTimeout(() => {
this.showError('Model loading timed out. Please check your internet connection.');
}, 30000);
try {
this.mobilenetModel = await mobilenet.load();
this.cocoSsdModel = await cocoSsd.load();
clearTimeout(loadTimeout);
console.log('Models loaded successfully!');
} catch (error) {
clearTimeout(loadTimeout);
console.error('Error loading models:', error);
this.showError('Failed to load AI models. Please refresh and try again.');
return;
}
this.setupEventListeners();
}
Deployment and Framework Integration
For production deployment:
- Serve over HTTPS: Camera access requires secure context
- Host models locally: Download model files for faster loading
- Add loading indicators: Models can take time to download
- Implement progressive enhancement: Provide fallbacks for unsupported browsers
For React integration:
// React hook example
import { useEffect, useRef, useState } from 'react';
import * as mobilenet from '@tensorflow-models/mobilenet';
const useComputerVision = () => {
const [model, setModel] = useState(null);
const [isLoading, setIsLoading] = useState(true);
useEffect(() => {
mobilenet.load().then(loadedModel => {
setModel(loadedModel);
setIsLoading(false);
});
}, []);
const classify = async (imageElement) => {
if (!model) return [];
return await model.classify(imageElement);
};
return { classify, isLoading };
};
Conclusion
You now have a complete computer vision application that can:
- Classify images in real-time using your webcam
- Detect and highlight multiple objects
- Process uploaded images
- Handle errors gracefully
- Work across modern browsers
The beauty of TensorFlow.js is that it makes advanced AI accessible to web developers without requiring deep machine learning knowledge. Your users get powerful computer vision features while maintaining privacy, since everything runs locally in their browser.
Next steps you might consider:
- Experiment with different pre-trained models
- Add custom model training for specific use cases
- Integrate with your existing web applications
- Explore other TensorFlow.js models for pose detection, face recognition, or text analysis
The complete code is ready to run - jus
Tools mentioned:
Top comments (0)