DEV Community

agenthustler
agenthustler

Posted on

E-commerce Price Monitoring: Scraping Amazon, eBay, Walmart for Price Alerts

Price monitoring is one of the most practical applications of web scraping. Whether you're a shopper hunting deals, a seller tracking competitors, or a business monitoring market prices, automated price tracking saves hours of manual checking.

In this guide, I'll show you how to build a price monitoring pipeline for major e-commerce platforms.

The Price Monitoring Pipeline

A complete price monitoring system has four stages:

  1. Scrape — Collect current prices from target sites
  2. Store — Save price history in a database
  3. Detect — Identify price changes that matter
  4. Alert — Notify when prices drop below thresholds

Setting Up the Data Model

from dataclasses import dataclass
from datetime import datetime
import sqlite3

@dataclass
class PricePoint:
    product_url: str
    product_name: str
    price: float
    currency: str
    platform: str
    timestamp: datetime

def init_database(db_path: str = "prices.db"):
    conn = sqlite3.connect(db_path)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS prices (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            product_url TEXT NOT NULL,
            product_name TEXT,
            price REAL NOT NULL,
            currency TEXT DEFAULT 'USD',
            platform TEXT NOT NULL,
            timestamp TEXT NOT NULL
        )
    """)
    conn.execute("""
        CREATE INDEX IF NOT EXISTS idx_product_url 
        ON prices(product_url, timestamp)
    """)
    conn.commit()
    return conn

def save_price(conn, price_point):
    conn.execute(
        "INSERT INTO prices (product_url, product_name, price, currency, platform, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
        (price_point.product_url, price_point.product_name, price_point.price,
         price_point.currency, price_point.platform, price_point.timestamp.isoformat())
    )
    conn.commit()
Enter fullscreen mode Exit fullscreen mode

Scraping Product Prices

Each platform requires a different parsing approach. Here's a unified scraper:

import requests
from bs4 import BeautifulSoup
import re

def scrape_price(url, api_key):
    """Scrape price from any supported e-commerce URL."""
    params = {
        "api_key": api_key,
        "url": url,
        "render": "true"
    }
    response = requests.get("https://api.scraperapi.com", params=params, timeout=60)

    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.text, "html.parser")

    if "amazon.com" in url:
        return parse_amazon(soup, url)
    elif "ebay.com" in url:
        return parse_ebay(soup, url)
    elif "walmart.com" in url:
        return parse_walmart(soup, url)
    return None

def parse_amazon(soup, url):
    title = soup.select_one("#productTitle")
    price_whole = soup.select_one("span.a-price-whole")
    price_fraction = soup.select_one("span.a-price-fraction")

    if title and price_whole:
        price_str = price_whole.get_text(strip=True).replace(",", "")
        fraction = price_fraction.get_text(strip=True) if price_fraction else "00"
        price = float(f"{price_str}{fraction}")

        return PricePoint(
            product_url=url,
            product_name=title.get_text(strip=True),
            price=price,
            currency="USD",
            platform="amazon",
            timestamp=datetime.now()
        )
    return None

def parse_ebay(soup, url):
    title = soup.select_one("h1.x-item-title__mainTitle span")
    price_el = soup.select_one("div.x-price-primary span")

    if title and price_el:
        price_text = price_el.get_text(strip=True)
        price_match = re.search(r"[\d,]+\.\d{2}", price_text)
        if price_match:
            price = float(price_match.group().replace(",", ""))
            return PricePoint(
                product_url=url,
                product_name=title.get_text(strip=True),
                price=price,
                currency="USD",
                platform="ebay",
                timestamp=datetime.now()
            )
    return None
Enter fullscreen mode Exit fullscreen mode

Using Ready-Made Scrapers

For production use, you can leverage ready-made Apify actors instead of maintaining your own parsers:

These handle anti-bot measures and data extraction automatically.

Price Drop Detection

def check_price_drops(conn, threshold_pct=5.0):
    """Find products with significant price drops."""
    cursor = conn.execute("""
        WITH latest AS (
            SELECT product_url, product_name, price, platform,
                   ROW_NUMBER() OVER (PARTITION BY product_url ORDER BY timestamp DESC) as rn
            FROM prices
        ),
        previous AS (
            SELECT product_url, price as prev_price,
                   ROW_NUMBER() OVER (PARTITION BY product_url ORDER BY timestamp DESC) as rn
            FROM prices
        )
        SELECT l.product_url, l.product_name, l.price as current_price,
               p.prev_price, l.platform,
               ROUND((p.prev_price - l.price) / p.prev_price * 100, 1) as drop_pct
        FROM latest l
        JOIN previous p ON l.product_url = p.product_url AND p.rn = 2
        WHERE l.rn = 1
          AND l.price < p.prev_price
          AND (p.prev_price - l.price) / p.prev_price * 100 >= ?
    """, (threshold_pct,))

    drops = []
    for row in cursor:
        drops.append({
            "url": row[0], "name": row[1],
            "current_price": row[2], "previous_price": row[3],
            "platform": row[4], "drop_percent": row[5],
        })
    return drops
Enter fullscreen mode Exit fullscreen mode

Email Alerts

import smtplib
from email.mime.text import MIMEText

def send_price_alert(drops, email_to):
    if not drops:
        return

    body = "Price drops detected:\n\n"
    for d in drops:
        body += f"  {d['name']} ({d['platform']})\n"
        body += f"  ${d['previous_price']} -> ${d['current_price']} ({d['drop_percent']}% off)\n"
        body += f"  {d['url']}\n\n"

    msg = MIMEText(body)
    msg["Subject"] = f"Price Alert: {len(drops)} price drops detected"
    msg["To"] = email_to

    with smtplib.SMTP("smtp.gmail.com", 587) as server:
        server.starttls()
        server.login("your_email", "your_app_password")
        server.send_message(msg)
Enter fullscreen mode Exit fullscreen mode

Running on a Schedule

import time

def monitoring_loop(urls, api_key, check_interval=3600):
    conn = init_database()

    while True:
        print(f"Checking {len(urls)} products...")
        for url in urls:
            price_point = scrape_price(url, api_key)
            if price_point:
                save_price(conn, price_point)
                print(f"  {price_point.platform}: {price_point.product_name} = ${price_point.price}")
            time.sleep(2)

        drops = check_price_drops(conn, threshold_pct=5.0)
        if drops:
            send_price_alert(drops, "your@email.com")
            print(f"Alert sent for {len(drops)} price drops!")

        print(f"Next check in {check_interval // 60} minutes")
        time.sleep(check_interval)

urls = [
    "https://amazon.com/dp/B0EXAMPLE1",
    "https://ebay.com/itm/123456789",
    "https://walmart.com/ip/EXAMPLE",
]
Enter fullscreen mode Exit fullscreen mode

Using Proxies for Scale

For monitoring hundreds of products, you'll need reliable proxy rotation to avoid blocks. ThorData provides residential proxies with geo-targeting, perfect for accessing localized pricing on e-commerce sites.

Conclusion

Price monitoring is a powerful, practical scraping application. Start with a few products, validate your parsers work, then scale up. For production deployments, consider using ready-made scrapers like the eBay and Walmart actors that handle the complexity for you.

Happy scraping!

Top comments (0)