Building a Competitive Intelligence Dashboard with Web Scraping
Understanding competitors' pricing changes, hiring patterns, and marketing strategies gives you a strategic advantage. Here's how to build a Python-powered competitive intelligence system.
Core Engine
import requests
from bs4 import BeautifulSoup
import sqlite3, json, hashlib
from datetime import datetime
import time, re
class CompetitiveIntel:
def __init__(self, db_path='intel.db', api_key=None):
self.db = sqlite3.connect(db_path)
self.api_key = api_key
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'})
self._init_db()
def _init_db(self):
self.db.executescript('''
CREATE TABLE IF NOT EXISTS competitors (
id TEXT PRIMARY KEY, name TEXT, domain TEXT, category TEXT);
CREATE TABLE IF NOT EXISTS intel (
id INTEGER PRIMARY KEY AUTOINCREMENT,
competitor_id TEXT, dimension TEXT,
title TEXT, content TEXT, url TEXT,
content_hash TEXT, detected_at DATETIME,
UNIQUE(competitor_id, content_hash));
CREATE TABLE IF NOT EXISTS pricing (
competitor_id TEXT, plan TEXT,
price REAL, features TEXT, captured_at DATETIME);
''')
def _fetch(self, url):
if self.api_key:
return self.session.get(
f"http://api.scraperapi.com?api_key={self.api_key}&url={url}&render=true")
return self.session.get(url)
def add_competitor(self, cid, name, domain, cat='direct'):
self.db.execute('INSERT OR REPLACE INTO competitors VALUES (?,?,?,?)',
(cid, name, domain, cat))
self.db.commit()
Pricing Intelligence
def track_pricing(self, cid, url, plan_sel, price_sel):
resp = self._fetch(url)
soup = BeautifulSoup(resp.text, 'html.parser')
plans = []
for el in soup.select(plan_sel):
name = el.select_one('h2, h3, .plan-name')
price_el = el.select_one(price_sel)
if not name or not price_el: continue
match = re.search(r'\$(\d+\.?\d*)', price_el.get_text())
price = float(match.group(1)) if match else 0
features = [li.get_text(strip=True) for li in el.select('li')]
plans.append({'name': name.get_text(strip=True), 'price': price, 'features': features})
self.db.execute('INSERT INTO pricing VALUES (?,?,?,?,?)',
(cid, plans[-1]['name'], price, json.dumps(features), datetime.now().isoformat()))
self.db.commit()
return plans
def pricing_changes(self, cid):
rows = self.db.execute(
'SELECT plan, price, captured_at FROM pricing WHERE competitor_id=? ORDER BY captured_at DESC',
(cid,)).fetchall()
changes, seen = [], {}
for plan, price, date in rows:
if plan in seen and seen[plan] != price:
changes.append({'plan': plan, 'old': price, 'new': seen[plan],
'pct': round((seen[plan]-price)/price*100, 1)})
seen[plan] = price
return changes
Job Posting Intelligence
def track_hiring(self, cid, url, job_sel):
resp = self._fetch(url)
soup = BeautifulSoup(resp.text, 'html.parser')
jobs = []
for el in soup.select(job_sel):
title = el.select_one('h2, h3, a, .job-title')
dept = el.select_one('.department, .team')
loc = el.select_one('.location')
if not title: continue
job = {'title': title.get_text(strip=True),
'dept': dept.get_text(strip=True) if dept else 'Unknown',
'location': loc.get_text(strip=True) if loc else 'Unknown'}
jobs.append(job)
h = hashlib.sha256(job['title'].encode()).hexdigest()
try:
self.db.execute('INSERT INTO intel VALUES (NULL,?,?,?,?,?,?,?)',
(cid, 'hiring', job['title'], json.dumps(job), '', h, datetime.now().isoformat()))
except sqlite3.IntegrityError: pass
self.db.commit()
return jobs
def hiring_trends(self, cid):
rows = self.db.execute('''
SELECT content FROM intel WHERE competitor_id=? AND dimension='hiring'
AND detected_at > datetime('now','-30 days')
''', (cid,)).fetchall()
depts = {}
for r in rows:
d = json.loads(r[0]).get('dept', 'Unknown')
depts[d] = depts.get(d, 0) + 1
return dict(sorted(depts.items(), key=lambda x: x[1], reverse=True))
Reports
class IntelReport:
def __init__(self, engine):
self.e = engine
def generate(self, cid):
comp = self.e.db.execute('SELECT name,domain FROM competitors WHERE id=?', (cid,)).fetchone()
if not comp: return None
return {'competitor': comp[0], 'domain': comp[1],
'pricing_changes': self.e.pricing_changes(cid),
'hiring_trends': self.e.hiring_trends(cid)}
def display(self, r):
print(f"\nIntelligence: {r['competitor']}")
print("="*50)
if r['pricing_changes']:
print("\nPricing Changes:")
for c in r['pricing_changes']:
d = 'UP' if c['pct'] > 0 else 'DOWN'
print(f" {c['plan']}: ${c['old']} -> ${c['new']} ({d} {abs(c['pct'])}%)")
if r['hiring_trends']:
print("\nHiring:")
for dept, n in r['hiring_trends'].items():
print(f" {dept}: {n} positions")
intel = CompetitiveIntel(api_key='YOUR_KEY')
intel.add_competitor('c1', 'Competitor A', 'competitor-a.com')
report = IntelReport(intel)
r = report.generate('c1')
report.display(r)
For monitoring dozens of competitors, ScraperAPI handles JS rendering. ThorData provides residential proxies. Track pipelines with ScrapeOps.
Follow for more Python business intelligence tutorials.
Top comments (0)