Price comparison across retailers can save you hundreds per year. A personal shopper bot automates this by scraping prices, tracking history, and alerting you to deals. Here's how to build one.
Why Build a Price Bot?
Prices fluctuate constantly. A product might be $50 on one site and $35 on another. Prices drop for flash sales and rise before holidays. Automated monitoring catches savings you'd otherwise miss.
Setup
pip install requests beautifulsoup4 pandas schedule
Building the Price Scraper
Create a multi-retailer price scraper:
import requests
from bs4 import BeautifulSoup
import json
import re
from datetime import datetime
class PriceScraper:
def __init__(self, scraperapi_key):
self.api_key = scraperapi_key
self.base_url = "https://api.scraperapi.com"
def fetch(self, url, render=False):
params = {
"api_key": self.api_key,
"url": url,
"render": str(render).lower()
}
response = requests.get(self.base_url, params=params)
return BeautifulSoup(response.text, "html.parser")
def scrape_amazon(self, url):
soup = self.fetch(url, render=True)
title = soup.select_one("#productTitle")
price = soup.select_one(".a-price .a-offscreen")
rating = soup.select_one("#acrPopover")
availability = soup.select_one("#availability span")
return {
"retailer": "Amazon",
"title": title.text.strip() if title else "",
"price": self.parse_price(price.text if price else "0"),
"rating": rating.get("title", "") if rating else "",
"in_stock": "in stock" in (availability.text.lower() if availability else ""),
"url": url,
"scraped_at": datetime.now().isoformat()
}
def scrape_generic(self, url):
soup = self.fetch(url, render=True)
# Try common price selectors
price_selectors = [
"[itemprop='price']", ".price", ".product-price",
".current-price", "#price", ".sale-price"
]
price = None
for selector in price_selectors:
el = soup.select_one(selector)
if el:
price = self.parse_price(el.get("content", el.text))
if price > 0:
break
title_selectors = [
"[itemprop='name']", "h1.product-title",
"h1.product-name", "#product-title", "h1"
]
title = ""
for selector in title_selectors:
el = soup.select_one(selector)
if el:
title = el.text.strip()
break
return {
"retailer": self.extract_domain(url),
"title": title,
"price": price or 0,
"url": url,
"scraped_at": datetime.now().isoformat()
}
@staticmethod
def parse_price(text):
numbers = re.findall(r"\d+\.?\d*", text.replace(",", ""))
return float(numbers[0]) if numbers else 0
@staticmethod
def extract_domain(url):
from urllib.parse import urlparse
return urlparse(url).netloc.replace("www.", "")
scraper = PriceScraper("YOUR_SCRAPERAPI_KEY")
Price History Tracking
import pandas as pd
import os
class PriceTracker:
def __init__(self, scraper, history_file="price_history.csv"):
self.scraper = scraper
self.history_file = history_file
self.load_history()
def load_history(self):
if os.path.exists(self.history_file):
self.history = pd.read_csv(self.history_file)
else:
self.history = pd.DataFrame(
columns=["product_id", "retailer", "title",
"price", "url", "scraped_at"])
def track_product(self, product_id, urls):
results = []
for url in urls:
try:
if "amazon" in url:
data = self.scraper.scrape_amazon(url)
else:
data = self.scraper.scrape_generic(url)
data["product_id"] = product_id
results.append(data)
except Exception as e:
print(f"Error scraping {url}: {e}")
new_data = pd.DataFrame(results)
self.history = pd.concat([self.history, new_data], ignore_index=True)
self.history.to_csv(self.history_file, index=False)
return results
def get_best_price(self, product_id):
product = self.history[self.history["product_id"] == product_id]
if product.empty:
return None
latest = product.sort_values("scraped_at").groupby("retailer").last()
best = latest.loc[latest["price"].idxmin()]
return {
"retailer": best.name,
"price": best["price"],
"url": best["url"]
}
def price_trend(self, product_id, retailer=None):
product = self.history[self.history["product_id"] == product_id]
if retailer:
product = product[product["retailer"] == retailer]
product = product.sort_values("scraped_at")
if len(product) < 2:
return "Insufficient data"
first_price = product.iloc[0]["price"]
last_price = product.iloc[-1]["price"]
change = ((last_price - first_price) / first_price) * 100
return {
"current": last_price,
"lowest": product["price"].min(),
"highest": product["price"].max(),
"change_pct": round(change, 1)
}
tracker = PriceTracker(scraper)
Setting Up Deal Alerts
import schedule
import time
def setup_watchlist():
watchlist = {
"headphones": {
"urls": [
"https://amazon.com/dp/EXAMPLE1",
"https://bestbuy.com/product/EXAMPLE1",
],
"target_price": 200.00
},
"keyboard": {
"urls": [
"https://amazon.com/dp/EXAMPLE2",
"https://newegg.com/product/EXAMPLE2",
],
"target_price": 120.00
}
}
return watchlist
def check_prices():
watchlist = setup_watchlist()
for product_id, config in watchlist.items():
results = tracker.track_product(product_id, config["urls"])
best = tracker.get_best_price(product_id)
trend = tracker.price_trend(product_id)
if best and best["price"] <= config["target_price"]:
print(f"DEAL ALERT: {product_id}")
print(f" Price: ${best['price']:.2f} at {best['retailer']}")
print(f" Target was: ${config['target_price']:.2f}")
print(f" URL: {best['url']}")
if isinstance(trend, dict):
print(f"\n{product_id} trend: {trend['change_pct']:+.1f}% "
f"(low: ${trend['lowest']:.2f}, high: ${trend['highest']:.2f})")
# Run every 6 hours
schedule.every(6).hours.do(check_prices)
check_prices() # Run immediately first
while True:
schedule.run_pending()
time.sleep(60)
Recommended Infrastructure
- ScraperAPI handles JavaScript rendering and anti-bot protections on major retailers
- ThorData residential proxies prevent IP blocks during frequent checks
- ScrapeOps monitors your scraper uptime and success rates
Conclusion
A personal shopper bot turns price comparison from a manual chore into an automated advantage. Start with a few products you're watching, let the bot build price history, and set target prices for alerts. Over time, the savings add up significantly — especially for electronics and other items with volatile pricing.
Top comments (0)