How to Build a Personal Shopper Bot with Price Comparison Scraping

#python #tutorial #webdev #programming

Price comparison across retailers can save you hundreds per year. A personal shopper bot automates this by scraping prices, tracking history, and alerting you to deals. Here's how to build one.

Why Build a Price Bot?

Prices fluctuate constantly. A product might be $50 on one site and $35 on another. Prices drop for flash sales and rise before holidays. Automated monitoring catches savings you'd otherwise miss.

Setup

pip install requests beautifulsoup4 pandas schedule

Building the Price Scraper

Create a multi-retailer price scraper:

import requests
from bs4 import BeautifulSoup
import json
import re
from datetime import datetime

class PriceScraper:
    def __init__(self, scraperapi_key):
        self.api_key = scraperapi_key
        self.base_url = "https://api.scraperapi.com"

    def fetch(self, url, render=False):
        params = {
            "api_key": self.api_key,
            "url": url,
            "render": str(render).lower()
        }
        response = requests.get(self.base_url, params=params)
        return BeautifulSoup(response.text, "html.parser")

    def scrape_amazon(self, url):
        soup = self.fetch(url, render=True)

        title = soup.select_one("#productTitle")
        price = soup.select_one(".a-price .a-offscreen")
        rating = soup.select_one("#acrPopover")
        availability = soup.select_one("#availability span")

        return {
            "retailer": "Amazon",
            "title": title.text.strip() if title else "",
            "price": self.parse_price(price.text if price else "0"),
            "rating": rating.get("title", "") if rating else "",
            "in_stock": "in stock" in (availability.text.lower() if availability else ""),
            "url": url,
            "scraped_at": datetime.now().isoformat()
        }

    def scrape_generic(self, url):
        soup = self.fetch(url, render=True)

        # Try common price selectors
        price_selectors = [
            "[itemprop='price']", ".price", ".product-price",
            ".current-price", "#price", ".sale-price"
        ]

        price = None
        for selector in price_selectors:
            el = soup.select_one(selector)
            if el:
                price = self.parse_price(el.get("content", el.text))
                if price > 0:
                    break

        title_selectors = [
            "[itemprop='name']", "h1.product-title", 
            "h1.product-name", "#product-title", "h1"
        ]

        title = ""
        for selector in title_selectors:
            el = soup.select_one(selector)
            if el:
                title = el.text.strip()
                break

        return {
            "retailer": self.extract_domain(url),
            "title": title,
            "price": price or 0,
            "url": url,
            "scraped_at": datetime.now().isoformat()
        }

    @staticmethod
    def parse_price(text):
        numbers = re.findall(r"\d+\.?\d*", text.replace(",", ""))
        return float(numbers[0]) if numbers else 0

    @staticmethod
    def extract_domain(url):
        from urllib.parse import urlparse
        return urlparse(url).netloc.replace("www.", "")

scraper = PriceScraper("YOUR_SCRAPERAPI_KEY")

Price History Tracking

import pandas as pd
import os

class PriceTracker:
    def __init__(self, scraper, history_file="price_history.csv"):
        self.scraper = scraper
        self.history_file = history_file
        self.load_history()

    def load_history(self):
        if os.path.exists(self.history_file):
            self.history = pd.read_csv(self.history_file)
        else:
            self.history = pd.DataFrame(
                columns=["product_id", "retailer", "title", 
                         "price", "url", "scraped_at"])

    def track_product(self, product_id, urls):
        results = []
        for url in urls:
            try:
                if "amazon" in url:
                    data = self.scraper.scrape_amazon(url)
                else:
                    data = self.scraper.scrape_generic(url)

                data["product_id"] = product_id
                results.append(data)
            except Exception as e:
                print(f"Error scraping {url}: {e}")

        new_data = pd.DataFrame(results)
        self.history = pd.concat([self.history, new_data], ignore_index=True)
        self.history.to_csv(self.history_file, index=False)

        return results

    def get_best_price(self, product_id):
        product = self.history[self.history["product_id"] == product_id]
        if product.empty:
            return None

        latest = product.sort_values("scraped_at").groupby("retailer").last()
        best = latest.loc[latest["price"].idxmin()]

        return {
            "retailer": best.name,
            "price": best["price"],
            "url": best["url"]
        }

    def price_trend(self, product_id, retailer=None):
        product = self.history[self.history["product_id"] == product_id]
        if retailer:
            product = product[product["retailer"] == retailer]

        product = product.sort_values("scraped_at")

        if len(product) < 2:
            return "Insufficient data"

        first_price = product.iloc[0]["price"]
        last_price = product.iloc[-1]["price"]
        change = ((last_price - first_price) / first_price) * 100

        return {
            "current": last_price,
            "lowest": product["price"].min(),
            "highest": product["price"].max(),
            "change_pct": round(change, 1)
        }

tracker = PriceTracker(scraper)

Setting Up Deal Alerts

import schedule
import time

def setup_watchlist():
    watchlist = {
        "headphones": {
            "urls": [
                "https://amazon.com/dp/EXAMPLE1",
                "https://bestbuy.com/product/EXAMPLE1",
            ],
            "target_price": 200.00
        },
        "keyboard": {
            "urls": [
                "https://amazon.com/dp/EXAMPLE2",
                "https://newegg.com/product/EXAMPLE2",
            ],
            "target_price": 120.00
        }
    }
    return watchlist

def check_prices():
    watchlist = setup_watchlist()

    for product_id, config in watchlist.items():
        results = tracker.track_product(product_id, config["urls"])
        best = tracker.get_best_price(product_id)
        trend = tracker.price_trend(product_id)

        if best and best["price"] <= config["target_price"]:
            print(f"DEAL ALERT: {product_id}")
            print(f"  Price: ${best['price']:.2f} at {best['retailer']}")
            print(f"  Target was: ${config['target_price']:.2f}")
            print(f"  URL: {best['url']}")

        if isinstance(trend, dict):
            print(f"\n{product_id} trend: {trend['change_pct']:+.1f}% "
                  f"(low: ${trend['lowest']:.2f}, high: ${trend['highest']:.2f})")

# Run every 6 hours
schedule.every(6).hours.do(check_prices)
check_prices()  # Run immediately first

while True:
    schedule.run_pending()
    time.sleep(60)

Recommended Infrastructure

ScraperAPI handles JavaScript rendering and anti-bot protections on major retailers
ThorData residential proxies prevent IP blocks during frequent checks
ScrapeOps monitors your scraper uptime and success rates

Conclusion

A personal shopper bot turns price comparison from a manual chore into an automated advantage. Start with a few products you're watching, let the bot build price history, and set target prices for alerts. Over time, the savings add up significantly — especially for electronics and other items with volatile pricing.