DEV Community

Brad
Brad

Posted on

Python Web Scraping for Business Intelligence: Extract Competitor Prices Automatically

You're checking competitor pricing manually? That's 2 hours every week you'll never get back. Here's how to automate it with Python.

The Problem With Manual Price Monitoring

Every business needs to watch what competitors charge. But visiting 5-10 competitor sites weekly:

  • Takes 2-3 hours you could spend elsewhere
  • Is inconsistent (you forget, skip weeks, miss changes)
  • Gives you no historical data or trends

Python can do all this automatically, store results in a database, and email you when prices change.

The Simple Price Monitor

import httpx
import sqlite3
import smtplib
from email.mime.text import MIMEText
from datetime import datetime
import re

# Database setup
def setup_db():
    conn = sqlite3.connect('prices.db')
    conn.execute('''CREATE TABLE IF NOT EXISTS prices 
                   (id INTEGER PRIMARY KEY, 
                    product TEXT,
                    price REAL,
                    source TEXT,
                    timestamp TEXT)''')
    conn.commit()
    return conn

def save_price(conn, product, price, source):
    conn.execute("INSERT INTO prices VALUES (NULL, ?, ?, ?, ?)",
                (product, price, source, datetime.now().isoformat()))
    conn.commit()

def get_last_price(conn, product, source):
    row = conn.execute(
        "SELECT price FROM prices WHERE product=? AND source=? ORDER BY timestamp DESC LIMIT 1",
        (product, source)
    ).fetchone()
    return row[0] if row else None
Enter fullscreen mode Exit fullscreen mode

Scraping Prices From Real Sites

def scrape_price(url: str, price_pattern: str) -> float | None:
    """
    Extract a price from a webpage.
    price_pattern: regex like r'\$([\d,]+\.?\d*)'
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    try:
        r = httpx.get(url, headers=headers, timeout=10)
        match = re.search(price_pattern, r.text)
        if match:
            price_str = match.group(1).replace(',', '')
            return float(price_str)
    except Exception as e:
        print(f"Error scraping {url}: {e}")
    return None

# Example: Monitor SaaS competitor pricing pages
MONITORS = [
    {
        "product": "competitor_a_basic",
        "url": "https://competitor-a.com/pricing",
        "pattern": r'Basic.*?\$([\d]+)/mo',
        "source": "competitor_a"
    },
    {
        "product": "competitor_b_pro", 
        "url": "https://competitor-b.com/plans",
        "pattern": r'Pro Plan.*?\$([\d]+)',
        "source": "competitor_b"
    }
]
Enter fullscreen mode Exit fullscreen mode

Sending Price Change Alerts

def send_alert(product: str, old_price: float, new_price: float, source: str):
    change = ((new_price - old_price) / old_price) * 100
    direction = "increased" if change > 0 else "decreased"

    msg = MIMEText(f"""
Price Change Alert!

Product: {product}
Source: {source}
Old price: ${old_price:.2f}
New price: ${new_price:.2f}
Change: {abs(change):.1f}% {direction}

Check their pricing page to understand the change.
    """)
    msg['Subject'] = f"Price Alert: {product} {direction} by {abs(change):.0f}%"
    msg['From'] = "you@gmail.com"
    msg['To'] = "you@gmail.com"

    # Use Gmail App Password
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login("you@gmail.com", "your-app-password")
        server.send_message(msg)

def run_monitor():
    conn = setup_db()

    for monitor in MONITORS:
        price = scrape_price(monitor['url'], monitor['pattern'])

        if price is None:
            print(f"Could not scrape {monitor['product']}")
            continue

        last_price = get_last_price(conn, monitor['product'], monitor['source'])
        save_price(conn, monitor['product'], price, monitor['source'])

        if last_price and abs(price - last_price) > 0.01:
            print(f"Price changed: {monitor['product']} ${last_price} → ${price}")
            send_alert(monitor['product'], last_price, price, monitor['source'])
        else:
            print(f"No change: {monitor['product']} = ${price:.2f}")

    conn.close()

if __name__ == "__main__":
    run_monitor()
Enter fullscreen mode Exit fullscreen mode

Run It Automatically With Cron

Add to your crontab (crontab -e):

# Check competitor prices every Monday at 9am
0 9 * * 1 /usr/bin/python3 /home/user/price_monitor.py >> /var/log/price_monitor.log 2>&1
Enter fullscreen mode Exit fullscreen mode

Building a Price History Dashboard

Want a simple CSV export for analysis?

import csv
from datetime import datetime, timedelta

def export_price_history(days=30):
    conn = sqlite3.connect('prices.db')
    cutoff = (datetime.now() - timedelta(days=days)).isoformat()

    rows = conn.execute(
        "SELECT product, price, source, timestamp FROM prices WHERE timestamp > ? ORDER BY timestamp",
        (cutoff,)
    ).fetchall()

    with open('price_history.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['product', 'price', 'source', 'date'])
        writer.writerows(rows)

    print(f"Exported {len(rows)} price records to price_history.csv")
    conn.close()
Enter fullscreen mode Exit fullscreen mode

What to Do With the Data

Once you have 2-3 weeks of price history:

  1. Spot trends: Are competitors raising or lowering prices?
  2. Identify sale cycles: Do they discount at end of quarter?
  3. Benchmark your pricing: Are you too expensive? Too cheap?
  4. React faster: Price changes often signal strategic shifts

Taking This Further

The basic pattern above handles static pages. For dynamic JavaScript-rendered pages, you'll need Playwright:

from playwright.async_api import async_playwright
import asyncio

async def scrape_js_price(url: str, selector: str) -> str | None:
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)
        await page.wait_for_selector(selector, timeout=5000)
        text = await page.text_content(selector)
        await browser.close()
        return text
Enter fullscreen mode Exit fullscreen mode

Want this all pre-built? I packaged 12 Python business automation scripts (including this price monitor, invoice automation, inventory tracking, and more) into a ready-to-use toolkit. It includes setup instructions, configuration templates, and email alert systems.

Python Business Automation Toolkit — $29

All scripts work out of the box with Python 3.8+. No complex dependencies.

Top comments (0)