Price monitoring is one of the most practical applications of web scraping. Whether you're a shopper hunting deals, a seller tracking competitors, or a business monitoring market prices, automated price tracking saves hours of manual checking.
In this guide, I'll show you how to build a price monitoring pipeline for major e-commerce platforms.
The Price Monitoring Pipeline
A complete price monitoring system has four stages:
- Scrape — Collect current prices from target sites
- Store — Save price history in a database
- Detect — Identify price changes that matter
- Alert — Notify when prices drop below thresholds
Setting Up the Data Model
from dataclasses import dataclass
from datetime import datetime
import sqlite3
@dataclass
class PricePoint:
product_url: str
product_name: str
price: float
currency: str
platform: str
timestamp: datetime
def init_database(db_path: str = "prices.db"):
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS prices (
id INTEGER PRIMARY KEY AUTOINCREMENT,
product_url TEXT NOT NULL,
product_name TEXT,
price REAL NOT NULL,
currency TEXT DEFAULT 'USD',
platform TEXT NOT NULL,
timestamp TEXT NOT NULL
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_product_url
ON prices(product_url, timestamp)
""")
conn.commit()
return conn
def save_price(conn, price_point):
conn.execute(
"INSERT INTO prices (product_url, product_name, price, currency, platform, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
(price_point.product_url, price_point.product_name, price_point.price,
price_point.currency, price_point.platform, price_point.timestamp.isoformat())
)
conn.commit()
Scraping Product Prices
Each platform requires a different parsing approach. Here's a unified scraper:
import requests
from bs4 import BeautifulSoup
import re
def scrape_price(url, api_key):
"""Scrape price from any supported e-commerce URL."""
params = {
"api_key": api_key,
"url": url,
"render": "true"
}
response = requests.get("https://api.scraperapi.com", params=params, timeout=60)
if response.status_code != 200:
return None
soup = BeautifulSoup(response.text, "html.parser")
if "amazon.com" in url:
return parse_amazon(soup, url)
elif "ebay.com" in url:
return parse_ebay(soup, url)
elif "walmart.com" in url:
return parse_walmart(soup, url)
return None
def parse_amazon(soup, url):
title = soup.select_one("#productTitle")
price_whole = soup.select_one("span.a-price-whole")
price_fraction = soup.select_one("span.a-price-fraction")
if title and price_whole:
price_str = price_whole.get_text(strip=True).replace(",", "")
fraction = price_fraction.get_text(strip=True) if price_fraction else "00"
price = float(f"{price_str}{fraction}")
return PricePoint(
product_url=url,
product_name=title.get_text(strip=True),
price=price,
currency="USD",
platform="amazon",
timestamp=datetime.now()
)
return None
def parse_ebay(soup, url):
title = soup.select_one("h1.x-item-title__mainTitle span")
price_el = soup.select_one("div.x-price-primary span")
if title and price_el:
price_text = price_el.get_text(strip=True)
price_match = re.search(r"[\d,]+\.\d{2}", price_text)
if price_match:
price = float(price_match.group().replace(",", ""))
return PricePoint(
product_url=url,
product_name=title.get_text(strip=True),
price=price,
currency="USD",
platform="ebay",
timestamp=datetime.now()
)
return None
Using Ready-Made Scrapers
For production use, you can leverage ready-made Apify actors instead of maintaining your own parsers:
- eBay Scraper: apify.com/cryptosignals/ebay-scraper — extracts eBay listings with prices, shipping, seller info
- Walmart Scraper: apify.com/cryptosignals/walmart-scraper — collects Walmart product data including pricing and availability
These handle anti-bot measures and data extraction automatically.
Price Drop Detection
def check_price_drops(conn, threshold_pct=5.0):
"""Find products with significant price drops."""
cursor = conn.execute("""
WITH latest AS (
SELECT product_url, product_name, price, platform,
ROW_NUMBER() OVER (PARTITION BY product_url ORDER BY timestamp DESC) as rn
FROM prices
),
previous AS (
SELECT product_url, price as prev_price,
ROW_NUMBER() OVER (PARTITION BY product_url ORDER BY timestamp DESC) as rn
FROM prices
)
SELECT l.product_url, l.product_name, l.price as current_price,
p.prev_price, l.platform,
ROUND((p.prev_price - l.price) / p.prev_price * 100, 1) as drop_pct
FROM latest l
JOIN previous p ON l.product_url = p.product_url AND p.rn = 2
WHERE l.rn = 1
AND l.price < p.prev_price
AND (p.prev_price - l.price) / p.prev_price * 100 >= ?
""", (threshold_pct,))
drops = []
for row in cursor:
drops.append({
"url": row[0], "name": row[1],
"current_price": row[2], "previous_price": row[3],
"platform": row[4], "drop_percent": row[5],
})
return drops
Email Alerts
import smtplib
from email.mime.text import MIMEText
def send_price_alert(drops, email_to):
if not drops:
return
body = "Price drops detected:\n\n"
for d in drops:
body += f" {d['name']} ({d['platform']})\n"
body += f" ${d['previous_price']} -> ${d['current_price']} ({d['drop_percent']}% off)\n"
body += f" {d['url']}\n\n"
msg = MIMEText(body)
msg["Subject"] = f"Price Alert: {len(drops)} price drops detected"
msg["To"] = email_to
with smtplib.SMTP("smtp.gmail.com", 587) as server:
server.starttls()
server.login("your_email", "your_app_password")
server.send_message(msg)
Running on a Schedule
import time
def monitoring_loop(urls, api_key, check_interval=3600):
conn = init_database()
while True:
print(f"Checking {len(urls)} products...")
for url in urls:
price_point = scrape_price(url, api_key)
if price_point:
save_price(conn, price_point)
print(f" {price_point.platform}: {price_point.product_name} = ${price_point.price}")
time.sleep(2)
drops = check_price_drops(conn, threshold_pct=5.0)
if drops:
send_price_alert(drops, "your@email.com")
print(f"Alert sent for {len(drops)} price drops!")
print(f"Next check in {check_interval // 60} minutes")
time.sleep(check_interval)
urls = [
"https://amazon.com/dp/B0EXAMPLE1",
"https://ebay.com/itm/123456789",
"https://walmart.com/ip/EXAMPLE",
]
Using Proxies for Scale
For monitoring hundreds of products, you'll need reliable proxy rotation to avoid blocks. ThorData provides residential proxies with geo-targeting, perfect for accessing localized pricing on e-commerce sites.
Conclusion
Price monitoring is a powerful, practical scraping application. Start with a few products, validate your parsers work, then scale up. For production deployments, consider using ready-made scrapers like the eBay and Walmart actors that handle the complexity for you.
Happy scraping!
Top comments (0)