DEV Community

agenthustler
agenthustler

Posted on

How to Build a Personal Shopper Bot with Price Comparison Scraping

Price comparison across retailers can save you hundreds per year. A personal shopper bot automates this by scraping prices, tracking history, and alerting you to deals. Here's how to build one.

Why Build a Price Bot?

Prices fluctuate constantly. A product might be $50 on one site and $35 on another. Prices drop for flash sales and rise before holidays. Automated monitoring catches savings you'd otherwise miss.

Setup

pip install requests beautifulsoup4 pandas schedule
Enter fullscreen mode Exit fullscreen mode

Building the Price Scraper

Create a multi-retailer price scraper:

import requests
from bs4 import BeautifulSoup
import json
import re
from datetime import datetime

class PriceScraper:
    def __init__(self, scraperapi_key):
        self.api_key = scraperapi_key
        self.base_url = "https://api.scraperapi.com"

    def fetch(self, url, render=False):
        params = {
            "api_key": self.api_key,
            "url": url,
            "render": str(render).lower()
        }
        response = requests.get(self.base_url, params=params)
        return BeautifulSoup(response.text, "html.parser")

    def scrape_amazon(self, url):
        soup = self.fetch(url, render=True)

        title = soup.select_one("#productTitle")
        price = soup.select_one(".a-price .a-offscreen")
        rating = soup.select_one("#acrPopover")
        availability = soup.select_one("#availability span")

        return {
            "retailer": "Amazon",
            "title": title.text.strip() if title else "",
            "price": self.parse_price(price.text if price else "0"),
            "rating": rating.get("title", "") if rating else "",
            "in_stock": "in stock" in (availability.text.lower() if availability else ""),
            "url": url,
            "scraped_at": datetime.now().isoformat()
        }

    def scrape_generic(self, url):
        soup = self.fetch(url, render=True)

        # Try common price selectors
        price_selectors = [
            "[itemprop='price']", ".price", ".product-price",
            ".current-price", "#price", ".sale-price"
        ]

        price = None
        for selector in price_selectors:
            el = soup.select_one(selector)
            if el:
                price = self.parse_price(el.get("content", el.text))
                if price > 0:
                    break

        title_selectors = [
            "[itemprop='name']", "h1.product-title", 
            "h1.product-name", "#product-title", "h1"
        ]

        title = ""
        for selector in title_selectors:
            el = soup.select_one(selector)
            if el:
                title = el.text.strip()
                break

        return {
            "retailer": self.extract_domain(url),
            "title": title,
            "price": price or 0,
            "url": url,
            "scraped_at": datetime.now().isoformat()
        }

    @staticmethod
    def parse_price(text):
        numbers = re.findall(r"\d+\.?\d*", text.replace(",", ""))
        return float(numbers[0]) if numbers else 0

    @staticmethod
    def extract_domain(url):
        from urllib.parse import urlparse
        return urlparse(url).netloc.replace("www.", "")

scraper = PriceScraper("YOUR_SCRAPERAPI_KEY")
Enter fullscreen mode Exit fullscreen mode

Price History Tracking

import pandas as pd
import os

class PriceTracker:
    def __init__(self, scraper, history_file="price_history.csv"):
        self.scraper = scraper
        self.history_file = history_file
        self.load_history()

    def load_history(self):
        if os.path.exists(self.history_file):
            self.history = pd.read_csv(self.history_file)
        else:
            self.history = pd.DataFrame(
                columns=["product_id", "retailer", "title", 
                         "price", "url", "scraped_at"])

    def track_product(self, product_id, urls):
        results = []
        for url in urls:
            try:
                if "amazon" in url:
                    data = self.scraper.scrape_amazon(url)
                else:
                    data = self.scraper.scrape_generic(url)

                data["product_id"] = product_id
                results.append(data)
            except Exception as e:
                print(f"Error scraping {url}: {e}")

        new_data = pd.DataFrame(results)
        self.history = pd.concat([self.history, new_data], ignore_index=True)
        self.history.to_csv(self.history_file, index=False)

        return results

    def get_best_price(self, product_id):
        product = self.history[self.history["product_id"] == product_id]
        if product.empty:
            return None

        latest = product.sort_values("scraped_at").groupby("retailer").last()
        best = latest.loc[latest["price"].idxmin()]

        return {
            "retailer": best.name,
            "price": best["price"],
            "url": best["url"]
        }

    def price_trend(self, product_id, retailer=None):
        product = self.history[self.history["product_id"] == product_id]
        if retailer:
            product = product[product["retailer"] == retailer]

        product = product.sort_values("scraped_at")

        if len(product) < 2:
            return "Insufficient data"

        first_price = product.iloc[0]["price"]
        last_price = product.iloc[-1]["price"]
        change = ((last_price - first_price) / first_price) * 100

        return {
            "current": last_price,
            "lowest": product["price"].min(),
            "highest": product["price"].max(),
            "change_pct": round(change, 1)
        }

tracker = PriceTracker(scraper)
Enter fullscreen mode Exit fullscreen mode

Setting Up Deal Alerts

import schedule
import time

def setup_watchlist():
    watchlist = {
        "headphones": {
            "urls": [
                "https://amazon.com/dp/EXAMPLE1",
                "https://bestbuy.com/product/EXAMPLE1",
            ],
            "target_price": 200.00
        },
        "keyboard": {
            "urls": [
                "https://amazon.com/dp/EXAMPLE2",
                "https://newegg.com/product/EXAMPLE2",
            ],
            "target_price": 120.00
        }
    }
    return watchlist

def check_prices():
    watchlist = setup_watchlist()

    for product_id, config in watchlist.items():
        results = tracker.track_product(product_id, config["urls"])
        best = tracker.get_best_price(product_id)
        trend = tracker.price_trend(product_id)

        if best and best["price"] <= config["target_price"]:
            print(f"DEAL ALERT: {product_id}")
            print(f"  Price: ${best['price']:.2f} at {best['retailer']}")
            print(f"  Target was: ${config['target_price']:.2f}")
            print(f"  URL: {best['url']}")

        if isinstance(trend, dict):
            print(f"\n{product_id} trend: {trend['change_pct']:+.1f}% "
                  f"(low: ${trend['lowest']:.2f}, high: ${trend['highest']:.2f})")

# Run every 6 hours
schedule.every(6).hours.do(check_prices)
check_prices()  # Run immediately first

while True:
    schedule.run_pending()
    time.sleep(60)
Enter fullscreen mode Exit fullscreen mode

Recommended Infrastructure

  • ScraperAPI handles JavaScript rendering and anti-bot protections on major retailers
  • ThorData residential proxies prevent IP blocks during frequent checks
  • ScrapeOps monitors your scraper uptime and success rates

Conclusion

A personal shopper bot turns price comparison from a manual chore into an automated advantage. Start with a few products you're watching, let the bot build price history, and set target prices for alerts. Over time, the savings add up significantly — especially for electronics and other items with volatile pricing.

Top comments (0)