DEV Community

agenthustler
agenthustler

Posted on

Building a Competitive Intelligence Dashboard with Web Scraping

Building a Competitive Intelligence Dashboard with Web Scraping

Understanding competitors' pricing changes, hiring patterns, and marketing strategies gives you a strategic advantage. Here's how to build a Python-powered competitive intelligence system.

Core Engine

import requests
from bs4 import BeautifulSoup
import sqlite3, json, hashlib
from datetime import datetime
import time, re

class CompetitiveIntel:
    def __init__(self, db_path='intel.db', api_key=None):
        self.db = sqlite3.connect(db_path)
        self.api_key = api_key
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'})
        self._init_db()

    def _init_db(self):
        self.db.executescript('''
            CREATE TABLE IF NOT EXISTS competitors (
                id TEXT PRIMARY KEY, name TEXT, domain TEXT, category TEXT);
            CREATE TABLE IF NOT EXISTS intel (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                competitor_id TEXT, dimension TEXT,
                title TEXT, content TEXT, url TEXT,
                content_hash TEXT, detected_at DATETIME,
                UNIQUE(competitor_id, content_hash));
            CREATE TABLE IF NOT EXISTS pricing (
                competitor_id TEXT, plan TEXT,
                price REAL, features TEXT, captured_at DATETIME);
        ''')

    def _fetch(self, url):
        if self.api_key:
            return self.session.get(
                f"http://api.scraperapi.com?api_key={self.api_key}&url={url}&render=true")
        return self.session.get(url)

    def add_competitor(self, cid, name, domain, cat='direct'):
        self.db.execute('INSERT OR REPLACE INTO competitors VALUES (?,?,?,?)',
                       (cid, name, domain, cat))
        self.db.commit()
Enter fullscreen mode Exit fullscreen mode

Pricing Intelligence

    def track_pricing(self, cid, url, plan_sel, price_sel):
        resp = self._fetch(url)
        soup = BeautifulSoup(resp.text, 'html.parser')
        plans = []
        for el in soup.select(plan_sel):
            name = el.select_one('h2, h3, .plan-name')
            price_el = el.select_one(price_sel)
            if not name or not price_el: continue
            match = re.search(r'\$(\d+\.?\d*)', price_el.get_text())
            price = float(match.group(1)) if match else 0
            features = [li.get_text(strip=True) for li in el.select('li')]
            plans.append({'name': name.get_text(strip=True), 'price': price, 'features': features})
            self.db.execute('INSERT INTO pricing VALUES (?,?,?,?,?)',
                (cid, plans[-1]['name'], price, json.dumps(features), datetime.now().isoformat()))
        self.db.commit()
        return plans

    def pricing_changes(self, cid):
        rows = self.db.execute(
            'SELECT plan, price, captured_at FROM pricing WHERE competitor_id=? ORDER BY captured_at DESC',
            (cid,)).fetchall()
        changes, seen = [], {}
        for plan, price, date in rows:
            if plan in seen and seen[plan] != price:
                changes.append({'plan': plan, 'old': price, 'new': seen[plan],
                    'pct': round((seen[plan]-price)/price*100, 1)})
            seen[plan] = price
        return changes
Enter fullscreen mode Exit fullscreen mode

Job Posting Intelligence

    def track_hiring(self, cid, url, job_sel):
        resp = self._fetch(url)
        soup = BeautifulSoup(resp.text, 'html.parser')
        jobs = []
        for el in soup.select(job_sel):
            title = el.select_one('h2, h3, a, .job-title')
            dept = el.select_one('.department, .team')
            loc = el.select_one('.location')
            if not title: continue
            job = {'title': title.get_text(strip=True),
                   'dept': dept.get_text(strip=True) if dept else 'Unknown',
                   'location': loc.get_text(strip=True) if loc else 'Unknown'}
            jobs.append(job)
            h = hashlib.sha256(job['title'].encode()).hexdigest()
            try:
                self.db.execute('INSERT INTO intel VALUES (NULL,?,?,?,?,?,?,?)',
                    (cid, 'hiring', job['title'], json.dumps(job), '', h, datetime.now().isoformat()))
            except sqlite3.IntegrityError: pass
        self.db.commit()
        return jobs

    def hiring_trends(self, cid):
        rows = self.db.execute('''
            SELECT content FROM intel WHERE competitor_id=? AND dimension='hiring'
            AND detected_at > datetime('now','-30 days')
        ''', (cid,)).fetchall()
        depts = {}
        for r in rows:
            d = json.loads(r[0]).get('dept', 'Unknown')
            depts[d] = depts.get(d, 0) + 1
        return dict(sorted(depts.items(), key=lambda x: x[1], reverse=True))
Enter fullscreen mode Exit fullscreen mode

Reports

class IntelReport:
    def __init__(self, engine):
        self.e = engine

    def generate(self, cid):
        comp = self.e.db.execute('SELECT name,domain FROM competitors WHERE id=?', (cid,)).fetchone()
        if not comp: return None
        return {'competitor': comp[0], 'domain': comp[1],
                'pricing_changes': self.e.pricing_changes(cid),
                'hiring_trends': self.e.hiring_trends(cid)}

    def display(self, r):
        print(f"\nIntelligence: {r['competitor']}")
        print("="*50)
        if r['pricing_changes']:
            print("\nPricing Changes:")
            for c in r['pricing_changes']:
                d = 'UP' if c['pct'] > 0 else 'DOWN'
                print(f"  {c['plan']}: ${c['old']} -> ${c['new']} ({d} {abs(c['pct'])}%)")
        if r['hiring_trends']:
            print("\nHiring:")
            for dept, n in r['hiring_trends'].items():
                print(f"  {dept}: {n} positions")

intel = CompetitiveIntel(api_key='YOUR_KEY')
intel.add_competitor('c1', 'Competitor A', 'competitor-a.com')
report = IntelReport(intel)
r = report.generate('c1')
report.display(r)
Enter fullscreen mode Exit fullscreen mode

For monitoring dozens of competitors, ScraperAPI handles JS rendering. ThorData provides residential proxies. Track pipelines with ScrapeOps.


Follow for more Python business intelligence tutorials.

Top comments (0)