Ever wondered what framework a website uses? Or which CDN, analytics tool, or CMS powers a competitor's site? Tools like Wappalyzer and BuiltWith charge for API access, but you can build your own tech stack detector with a simple web scraping API.
In this tutorial, we'll build a CLI tool that analyzes any website's HTML and identifies its technology stack ā frameworks, CDNs, analytics, CMS platforms, and more.
What We're Building
A Node.js script that:
- Fetches a website's full HTML (rendered JavaScript included)
- Detects frontend frameworks (React, Vue, Angular, Svelte, Next.js)
- Identifies CSS frameworks (Tailwind, Bootstrap, Bulma)
- Spots analytics & tracking (Google Analytics, Hotjar, Segment, Plausible)
- Finds CDNs (Cloudflare, Fastly, AWS CloudFront)
- Detects CMS platforms (WordPress, Shopify, Webflow, Ghost)
- Identifies hosting/infrastructure clues
Prerequisites
Grab a free API key (200 credits, no card required):
curl -s -X POST https://api.frostbyte.world/api/keys/create | jq
Save the key value ā you'll need it.
The Detection Engine
Create detect-stack.mjs:
const API_KEY = process.env.FROSTBYTE_KEY || 'YOUR_KEY';
const API = 'https://api.frostbyte.world';
// Technology signatures ā pattern: [regex or string, tech name, category]
const SIGNATURES = [
// Frontend Frameworks
[/__next/g, 'Next.js', 'Framework'],
[/_nuxt/g, 'Nuxt.js', 'Framework'],
[/react-root|__react/gi, 'React', 'Framework'],
[/ng-version|ng-app/g, 'Angular', 'Framework'],
[/data-v-[a-f0-9]/g, 'Vue.js', 'Framework'],
[/svelte-[a-z]/g, 'Svelte', 'Framework'],
[/gatsby-/g, 'Gatsby', 'Framework'],
[/astro-/gi, 'Astro', 'Framework'],
[/remix-/gi, 'Remix', 'Framework'],
[/__sveltekit/g, 'SvelteKit', 'Framework'],
// CSS Frameworks
[/tailwindcss|tailwind\./gi, 'Tailwind CSS', 'CSS'],
[/bootstrap\.min|bootstrap\//gi, 'Bootstrap', 'CSS'],
[/bulma\.min|bulma\//gi, 'Bulma', 'CSS'],
[/materialize\.min/gi, 'Materialize', 'CSS'],
// Analytics & Tracking
[/gtag|google-analytics|googletagmanager/gi, 'Google Analytics', 'Analytics'],
[/hotjar\.com/gi, 'Hotjar', 'Analytics'],
[/segment\.com\/analytics/gi, 'Segment', 'Analytics'],
[/plausible\.io/gi, 'Plausible', 'Analytics'],
[/mixpanel\.com/gi, 'Mixpanel', 'Analytics'],
[/amplitude\.com/gi, 'Amplitude', 'Analytics'],
[/posthog\.com/gi, 'PostHog', 'Analytics'],
[/clarity\.ms/gi, 'Microsoft Clarity', 'Analytics'],
[/intercom\.com|intercomcdn/gi, 'Intercom', 'Widget'],
[/crisp\.chat/gi, 'Crisp', 'Widget'],
// CMS
[/wp-content|wp-includes|wordpress/gi, 'WordPress', 'CMS'],
[/cdn\.shopify/gi, 'Shopify', 'CMS'],
[/squarespace\.com/gi, 'Squarespace', 'CMS'],
[/webflow\.com|wf-/gi, 'Webflow', 'CMS'],
[/ghost\.io|ghost\.org/gi, 'Ghost', 'CMS'],
[/wix\.com|parastorage/gi, 'Wix', 'CMS'],
[/contentful\.com/gi, 'Contentful', 'CMS'],
// CDN & Infrastructure
[/cloudflare/gi, 'Cloudflare', 'CDN'],
[/fastly/gi, 'Fastly', 'CDN'],
[/cloudfront\.net/gi, 'AWS CloudFront', 'CDN'],
[/akamai/gi, 'Akamai', 'CDN'],
[/vercel/gi, 'Vercel', 'Hosting'],
[/netlify/gi, 'Netlify', 'Hosting'],
[/herokuapp/gi, 'Heroku', 'Hosting'],
[/firebase/gi, 'Firebase', 'Infrastructure'],
// JavaScript Libraries
[/jquery\.min|jquery\//gi, 'jQuery', 'Library'],
[/lodash/gi, 'Lodash', 'Library'],
[/axios/gi, 'Axios', 'Library'],
[/gsap|greensock/gi, 'GSAP', 'Library'],
[/three\.js|threejs/gi, 'Three.js', 'Library'],
[/alpine\.?js|x-data=/gi, 'Alpine.js', 'Library'],
[/htmx/gi, 'HTMX', 'Library'],
// Payment
[/stripe\.com|stripe\.js/gi, 'Stripe', 'Payment'],
[/paypal\.com/gi, 'PayPal', 'Payment'],
// Auth
[/auth0\.com/gi, 'Auth0', 'Auth'],
[/clerk\.com|clerk\.dev/gi, 'Clerk', 'Auth'],
[/supabase/gi, 'Supabase', 'Infrastructure'],
];
async function scrapeHTML(url) {
const res = await fetch(`${API}/api/scraper/scrape`, {
method: 'POST',
headers: {
'X-API-Key': API_KEY,
'Content-Type': 'application/json',
},
body: JSON.stringify({
url,
format: 'html',
javascript: true,
timeout: 15000,
}),
});
if (!res.ok) throw new Error(`Scrape failed: ${res.status}`);
const data = await res.json();
return data.content || data.html || '';
}
function detectTechnologies(html) {
const found = new Map();
for (const [pattern, name, category] of SIGNATURES) {
pattern.lastIndex = 0; // reset regex state
if (pattern.test(html)) {
if (!found.has(name)) {
found.set(name, category);
}
}
}
return found;
}
function detectMetaGenerators(html) {
const generators = [];
const re = /<meta[^>]+name=["']generator["'][^>]+content=["']([^"']+)["']/gi;
let match;
while ((match = re.exec(html)) !== null) {
generators.push(match[1]);
}
return generators;
}
async function analyze(url) {
if (!url.startsWith('http')) url = 'https://' + url;
console.log(`\nš Analyzing: ${url}\n`);
const html = await scrapeHTML(url);
console.log(`š Fetched ${html.length.toLocaleString()} characters of HTML\n`);
// Detect technologies
const techs = detectTechnologies(html);
const generators = detectMetaGenerators(html);
// Add generator meta tags
for (const gen of generators) {
techs.set(gen, 'Generator');
}
if (techs.size === 0) {
console.log('No technologies detected.');
return;
}
// Group by category
const grouped = {};
for (const [name, category] of techs) {
if (!grouped[category]) grouped[category] = [];
grouped[category].push(name);
}
// Display results
const icons = {
Framework: 'āļø',
CSS: 'šØ',
Analytics: 'š',
Widget: 'š¬',
CMS: 'š',
CDN: 'š',
Hosting: 'āļø',
Infrastructure: 'šļø',
Library: 'š¦',
Payment: 'š³',
Auth: 'š',
Generator: 'āļø',
};
for (const [category, items] of Object.entries(grouped)) {
const icon = icons[category] || 'š§';
console.log(`${icon} ${category}`);
for (const item of items) {
console.log(` āā ${item}`);
}
console.log();
}
console.log(`Total: ${techs.size} technologies detected`);
}
// Run
const url = process.argv[2];
if (!url) {
console.log('Usage: node detect-stack.mjs <url>');
console.log('Example: node detect-stack.mjs stripe.com');
process.exit(1);
}
analyze(url).catch(e => console.error('Error:', e.message));
Try It Out
export FROSTBYTE_KEY="your-key-here"
node detect-stack.mjs vercel.com
Output:
š Analyzing: https://vercel.com
š Fetched 184,293 characters of HTML
āļø Framework
āā Next.js
āā React
šØ CSS
āā Tailwind CSS
š Analytics
āā Google Analytics
āā Segment
āļø Hosting
āā Vercel
š¦ Library
āā GSAP
Total: 7 technologies detected
Scan Multiple Sites at Once
Want to compare competitors? Add batch scanning:
async function batchScan(urls) {
console.log(`Scanning ${urls.length} sites...\n`);
const results = {};
for (const url of urls) {
try {
const html = await scrapeHTML(url.startsWith('http') ? url : `https://${url}`);
const techs = detectTechnologies(html);
results[url] = [...techs.entries()].map(([name, cat]) => `${name} (${cat})`);
console.log(`ā
${url}: ${techs.size} technologies`);
} catch (e) {
results[url] = [`Error: ${e.message}`];
console.log(`ā ${url}: ${e.message}`);
}
}
// Comparison table
console.log('\n--- Comparison ---\n');
for (const [url, techs] of Object.entries(results)) {
console.log(`${url}:`);
techs.forEach(t => console.log(` ⢠${t}`));
console.log();
}
}
// Usage:
// batchScan(['stripe.com', 'shopify.com', 'vercel.com']);
Extend It: Add Header Analysis
HTTP headers reveal even more about a site's infrastructure. Enhance the detector by also checking response headers:
async function detectFromHeaders(url) {
const res = await fetch(url, { redirect: 'follow' });
const headers = Object.fromEntries(res.headers.entries());
const found = [];
if (headers['server']?.includes('nginx')) found.push('Nginx');
if (headers['server']?.includes('Apache')) found.push('Apache');
if (headers['x-powered-by']?.includes('Express')) found.push('Express.js');
if (headers['x-powered-by']?.includes('PHP')) found.push('PHP');
if (headers['cf-ray']) found.push('Cloudflare');
if (headers['x-vercel-id']) found.push('Vercel');
if (headers['x-amz-cf-id']) found.push('AWS CloudFront');
if (headers['x-served-by']?.includes('cache-')) found.push('Fastly');
if (headers['x-cache']) found.push('CDN Cache');
if (headers['set-cookie']?.includes('__stripe')) found.push('Stripe');
return found;
}
How It Works
Scrape with JavaScript rendering ā Many modern sites are SPAs that render client-side. The scraper API renders JavaScript, so we see the actual DOM, not just the initial HTML shell.
Pattern matching ā We scan the HTML for technology-specific fingerprints: file paths (
/_next/,/wp-content/), DOM attributes (data-v-,ng-version), script sources, and inline references.Meta generator tags ā Many CMS platforms and static site generators embed
<meta name="generator">tags that directly identify the platform.Header inspection ā Server headers like
x-powered-by,server, and CDN-specific headers (cf-ray,x-vercel-id) reveal backend infrastructure.
Use Cases
- Competitive analysis ā See what tech your competitors use
- Lead generation ā Find companies using a specific framework (great for dev tool sales)
- Security auditing ā Identify outdated frameworks or known-vulnerable libraries
- Market research ā Track technology adoption trends across industries
- Sales intelligence ā Know a prospect's stack before pitching your dev tool
API Cost
Each scan uses 1 credit (scraper API call). With 200 free credits, you can analyze 200 websites before needing to top up.
Get your free API key: api.frostbyte.world ā 200 free credits, 8 APIs (scraping, screenshots, IP geolocation, DNS, crypto prices, code execution, and more).
Top comments (0)