Prescription drug prices vary dramatically between pharmacies. A single medication can cost 10x more at one pharmacy versus another. Let's build a price comparison tool that scrapes pharmacy pricing data.
Why Pharmacy Price Comparison Matters
The average American spends $1,300/year on prescriptions. GoodRx has shown that prices for the same drug can vary by 80% within a single zip code. Building your own comparison tool lets you track prices over time and find the best deals.
Setting Up
pip install requests beautifulsoup4 pandas
We'll use ScraperAPI since pharmacy sites have strong anti-bot protections:
import requests
from bs4 import BeautifulSoup
import json
import re
SCRAPER_KEY = "YOUR_SCRAPERAPI_KEY"
def fetch_page(url):
"""Fetch page through ScraperAPI with JS rendering."""
resp = requests.get(
"http://api.scraperapi.com",
params={
"api_key": SCRAPER_KEY,
"url": url,
"render": "true",
"country_code": "us"
},
timeout=60
)
return BeautifulSoup(resp.text, "html.parser")
Scraping GoodRx Prices
def scrape_goodrx(drug_name, zipcode):
"""Scrape GoodRx price comparisons for a drug."""
url = f"https://www.goodrx.com/{drug_name}?zip={zipcode}"
soup = fetch_page(url)
prices = []
for card in soup.select("[data-qa='pharmacy-card']"):
pharmacy = card.select_one("[data-qa='pharmacy-name']")
price = card.select_one("[data-qa='drug-price']")
discount = card.select_one("[data-qa='discount-type']")
if pharmacy and price:
prices.append({
"source": "GoodRx",
"pharmacy": pharmacy.text.strip(),
"price": parse_price(price.text),
"discount_type": discount.text.strip() if discount else "Retail",
"drug": drug_name
})
return prices
def parse_price(text):
"""Extract numeric price from text like '$12.34'."""
match = re.search(r"\$(\d+\.?\d*)", text.replace(",", ""))
return float(match.group(1)) if match else None
Scraping Costco Pharmacy
def scrape_costco(drug_name):
"""Scrape Costco member pharmacy prices."""
url = (
f"https://www.costco.com/pharmacy/"
f"drug-results-by-drug-name?storeId=&drugName={drug_name}"
)
soup = fetch_page(url)
prices = []
for row in soup.select(".drug-results-row"):
form = row.select_one(".drug-form")
price = row.select_one(".drug-price")
quantity = row.select_one(".drug-quantity")
if price:
prices.append({
"source": "Costco",
"pharmacy": "Costco Pharmacy",
"price": parse_price(price.text),
"form": form.text.strip() if form else "",
"quantity": quantity.text.strip() if quantity else "",
"drug": drug_name
})
return prices
Building the Comparison Engine
import pandas as pd
def compare_drug_prices(drug_name, zipcode="10001"):
"""Compare prices across all pharmacy sources."""
all_prices = []
# Collect from all sources
sources = [
("GoodRx", lambda: scrape_goodrx(drug_name, zipcode)),
("Costco", lambda: scrape_costco(drug_name)),
]
for source_name, scraper in sources:
try:
prices = scraper()
all_prices.extend(prices)
print(f" Found {len(prices)} prices from {source_name}")
except Exception as e:
print(f" Error with {source_name}: {e}")
df = pd.DataFrame(all_prices)
if not df.empty:
df = df.sort_values("price")
savings = df["price"].max() - df["price"].min()
print(f"\nPotential savings: ${savings:.2f}")
return df
# Compare common medications
drugs = ["metformin", "lisinopril", "atorvastatin", "amoxicillin"]
for drug in drugs:
print(f"\n=== {drug.upper()} ===")
results = compare_drug_prices(drug)
if not results.empty:
print(results[["pharmacy", "price", "source"]].head(10))
Tracking Price Trends
import sqlite3
from datetime import date
def save_prices(df, db="pharmacy_prices.db"):
"""Store daily price snapshot."""
conn = sqlite3.connect(db)
df["date"] = date.today().isoformat()
df.to_sql("prices", conn, if_exists="append", index=False)
conn.close()
def price_trend(drug, pharmacy=None, db="pharmacy_prices.db"):
"""Get price history for a drug."""
conn = sqlite3.connect(db)
query = "SELECT date, price, pharmacy FROM prices WHERE drug = ?"
params = [drug]
if pharmacy:
query += " AND pharmacy = ?"
params.append(pharmacy)
df = pd.read_sql(query, conn, params=params)
conn.close()
return df
Handling Anti-Bot Measures
Pharmacy sites invest heavily in bot detection. Use ThorData residential proxies for consistent access, and ScrapeOps to monitor success rates. Key tips:
- Rotate User-Agent strings
- Add random delays between requests (2-5 seconds)
- Use residential IPs for pharmacy sites
- Cache results to minimize request frequency
Building Alerts
def check_price_drops(drug, threshold_pct=10, db="pharmacy_prices.db"):
"""Alert when a drug price drops significantly."""
conn = sqlite3.connect(db)
query = """
SELECT date, MIN(price) as best_price
FROM prices WHERE drug = ?
GROUP BY date ORDER BY date DESC LIMIT 7
"""
df = pd.read_sql(query, conn, params=[drug])
conn.close()
if len(df) >= 2:
current = df.iloc[0]["best_price"]
previous = df.iloc[1]["best_price"]
change_pct = ((current - previous) / previous) * 100
if change_pct < -threshold_pct:
print(f"ALERT: {drug} dropped {abs(change_pct):.1f}%!")
print(f" Was: ${previous:.2f} -> Now: ${current:.2f}")
return True
return False
Conclusion
Pharmacy price scraping reveals shocking price disparities that cost consumers billions annually. With Python and the right proxy setup, you can build tools that save real money on prescription medications. Start with GoodRx (richest data), add Costco (consistently low prices), then expand to more sources.
Always respect rate limits and terms of service when scraping these sites.
Top comments (0)