Knowing what technologies a website uses is valuable for sales prospecting, competitive intelligence, and security research. Tools like BuiltWith charge hundreds per month -- here's how to build your own detector.
What We Can Detect
By analyzing HTTP headers, HTML source, JavaScript files, and cookies, we can identify frameworks, CMS platforms, analytics tools, CDNs, and more.
Technology Stack Detector
pip install requests beautifulsoup4 tldextract
import requests
from bs4 import BeautifulSoup
import json
class TechStackDetector:
def __init__(self, api_key):
self.api_key = api_key
self.signatures = {
"WordPress": {"html": ["wp-content", "wp-includes"]},
"React": {"html": ["__NEXT_DATA__", "react-root", "_reactRootContainer"]},
"Vue.js": {"html": ["__VUE__", "v-cloak", "vue-app"]},
"Angular": {"html": ["ng-version", "ng-app"]},
"Next.js": {"html": ["__NEXT_DATA__", "_next/static"]},
"Shopify": {"html": ["cdn.shopify.com", "Shopify.theme"]},
"Cloudflare": {"headers": {"CF-RAY": "", "Server": "cloudflare"}},
"Google Analytics": {"html": ["google-analytics.com/analytics.js", "gtag/js"]},
"Stripe": {"html": ["js.stripe.com"]},
"Tailwind CSS": {"html": ["tailwindcss"]},
"Bootstrap": {"html": ["bootstrap.min.css", "bootstrap.min.js"]},
}
def detect(self, url):
proxy = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}&render=true"
resp = requests.get(proxy, timeout=30)
soup = BeautifulSoup(resp.text, "html.parser")
html = resp.text.lower()
headers = dict(resp.headers)
cookies = {c.name: c.value for c in resp.cookies}
detected = []
for tech, sigs in self.signatures.items():
confidence = 0
for pattern in sigs.get("html", []):
if pattern.lower() in html:
confidence += 40
for header, value in sigs.get("headers", {}).items():
if header in headers:
confidence += 30
for cookie in sigs.get("cookies", []):
if cookie in cookies:
confidence += 30
if confidence > 0:
detected.append({"technology": tech, "confidence": min(confidence, 100)})
return sorted(detected, key=lambda x: x["confidence"], reverse=True)
# Usage
detector = TechStackDetector("YOUR_SCRAPERAPI_KEY")
stack = detector.detect("https://example.com")
for tech in stack:
print(f" {tech['technology']}: {tech['confidence']}% confidence")
Bulk Detection for Prospecting
import pandas as pd
import time
def detect_bulk(domains, api_key):
detector = TechStackDetector(api_key)
results = []
for domain in domains:
url = f"https://{domain}"
try:
stack = detector.detect(url)
for tech in stack:
results.append({"domain": domain, **tech})
except Exception:
results.append({"domain": domain, "technology": "Error", "confidence": 0})
time.sleep(2)
return pd.DataFrame(results)
domains = ["store1.com", "store2.com", "store3.com"]
df = detect_bulk(domains, "YOUR_SCRAPERAPI_KEY")
shopify_sites = df[df["technology"] == "Shopify"]
print(f"Shopify stores found: {len(shopify_sites)}")
Scaling Up
For large-scale detection, ScraperAPI handles rendering and anti-bot challenges. ThorData proxies help when scanning many domains. Monitor accuracy with ScrapeOps.
Conclusion
Building your own tech stack detector gives you the same intelligence as expensive SaaS tools. The signature-based approach is extensible -- just add new patterns as you discover them. Perfect for lead generation, competitive research, and security auditing.
Top comments (0)