Scraping Pharmacy Prices: GoodRx, Costco, and CVS Drug Costs

#python #tutorial #webdev #programming

Prescription drug prices vary dramatically between pharmacies. A single medication can cost 10x more at one pharmacy versus another. Let's build a price comparison tool that scrapes pharmacy pricing data.

Why Pharmacy Price Comparison Matters

The average American spends $1,300/year on prescriptions. GoodRx has shown that prices for the same drug can vary by 80% within a single zip code. Building your own comparison tool lets you track prices over time and find the best deals.

Setting Up

pip install requests beautifulsoup4 pandas

We'll use ScraperAPI since pharmacy sites have strong anti-bot protections:

import requests
from bs4 import BeautifulSoup
import json
import re

SCRAPER_KEY = "YOUR_SCRAPERAPI_KEY"

def fetch_page(url):
    """Fetch page through ScraperAPI with JS rendering."""
    resp = requests.get(
        "http://api.scraperapi.com",
        params={
            "api_key": SCRAPER_KEY,
            "url": url,
            "render": "true",
            "country_code": "us"
        },
        timeout=60
    )
    return BeautifulSoup(resp.text, "html.parser")

Scraping GoodRx Prices

def scrape_goodrx(drug_name, zipcode):
    """Scrape GoodRx price comparisons for a drug."""
    url = f"https://www.goodrx.com/{drug_name}?zip={zipcode}"
    soup = fetch_page(url)

    prices = []
    for card in soup.select("[data-qa='pharmacy-card']"):
        pharmacy = card.select_one("[data-qa='pharmacy-name']")
        price = card.select_one("[data-qa='drug-price']")
        discount = card.select_one("[data-qa='discount-type']")

        if pharmacy and price:
            prices.append({
                "source": "GoodRx",
                "pharmacy": pharmacy.text.strip(),
                "price": parse_price(price.text),
                "discount_type": discount.text.strip() if discount else "Retail",
                "drug": drug_name
            })
    return prices

def parse_price(text):
    """Extract numeric price from text like '$12.34'."""
    match = re.search(r"\$(\d+\.?\d*)", text.replace(",", ""))
    return float(match.group(1)) if match else None

Scraping Costco Pharmacy

def scrape_costco(drug_name):
    """Scrape Costco member pharmacy prices."""
    url = (
        f"https://www.costco.com/pharmacy/"
        f"drug-results-by-drug-name?storeId=&drugName={drug_name}"
    )
    soup = fetch_page(url)

    prices = []
    for row in soup.select(".drug-results-row"):
        form = row.select_one(".drug-form")
        price = row.select_one(".drug-price")
        quantity = row.select_one(".drug-quantity")

        if price:
            prices.append({
                "source": "Costco",
                "pharmacy": "Costco Pharmacy",
                "price": parse_price(price.text),
                "form": form.text.strip() if form else "",
                "quantity": quantity.text.strip() if quantity else "",
                "drug": drug_name
            })
    return prices

Building the Comparison Engine

import pandas as pd

def compare_drug_prices(drug_name, zipcode="10001"):
    """Compare prices across all pharmacy sources."""
    all_prices = []

    # Collect from all sources
    sources = [
        ("GoodRx", lambda: scrape_goodrx(drug_name, zipcode)),
        ("Costco", lambda: scrape_costco(drug_name)),
    ]

    for source_name, scraper in sources:
        try:
            prices = scraper()
            all_prices.extend(prices)
            print(f"  Found {len(prices)} prices from {source_name}")
        except Exception as e:
            print(f"  Error with {source_name}: {e}")

    df = pd.DataFrame(all_prices)
    if not df.empty:
        df = df.sort_values("price")
        savings = df["price"].max() - df["price"].min()
        print(f"\nPotential savings: ${savings:.2f}")
    return df

# Compare common medications
drugs = ["metformin", "lisinopril", "atorvastatin", "amoxicillin"]
for drug in drugs:
    print(f"\n=== {drug.upper()} ===")
    results = compare_drug_prices(drug)
    if not results.empty:
        print(results[["pharmacy", "price", "source"]].head(10))

Tracking Price Trends

import sqlite3
from datetime import date

def save_prices(df, db="pharmacy_prices.db"):
    """Store daily price snapshot."""
    conn = sqlite3.connect(db)
    df["date"] = date.today().isoformat()
    df.to_sql("prices", conn, if_exists="append", index=False)
    conn.close()

def price_trend(drug, pharmacy=None, db="pharmacy_prices.db"):
    """Get price history for a drug."""
    conn = sqlite3.connect(db)
    query = "SELECT date, price, pharmacy FROM prices WHERE drug = ?"
    params = [drug]
    if pharmacy:
        query += " AND pharmacy = ?"
        params.append(pharmacy)
    df = pd.read_sql(query, conn, params=params)
    conn.close()
    return df

Handling Anti-Bot Measures

Pharmacy sites invest heavily in bot detection. Use ThorData residential proxies for consistent access, and ScrapeOps to monitor success rates. Key tips:

Rotate User-Agent strings
Add random delays between requests (2-5 seconds)
Use residential IPs for pharmacy sites
Cache results to minimize request frequency

Building Alerts

def check_price_drops(drug, threshold_pct=10, db="pharmacy_prices.db"):
    """Alert when a drug price drops significantly."""
    conn = sqlite3.connect(db)
    query = """
        SELECT date, MIN(price) as best_price
        FROM prices WHERE drug = ?
        GROUP BY date ORDER BY date DESC LIMIT 7
    """
    df = pd.read_sql(query, conn, params=[drug])
    conn.close()

    if len(df) >= 2:
        current = df.iloc[0]["best_price"]
        previous = df.iloc[1]["best_price"]
        change_pct = ((current - previous) / previous) * 100

        if change_pct < -threshold_pct:
            print(f"ALERT: {drug} dropped {abs(change_pct):.1f}%!")
            print(f"  Was: ${previous:.2f} -> Now: ${current:.2f}")
            return True
    return False

Conclusion

Pharmacy price scraping reveals shocking price disparities that cost consumers billions annually. With Python and the right proxy setup, you can build tools that save real money on prescription medications. Start with GoodRx (richest data), add Costco (consistently low prices), then expand to more sources.

Always respect rate limits and terms of service when scraping these sites.

DEV Community