DEV Community

agenthustler
agenthustler

Posted on

Scraping Insurance Quote Comparators at Scale with Python

Insurance comparison sites aggregate quotes from dozens of providers. Scraping this data reveals pricing patterns, regional differences, and competitive dynamics in the insurance market.

The Insurance Data Opportunity

Insurance comparison sites like Policygenius, The Zebra, and NerdWallet publish rate ranges, provider rankings, and coverage details. This data is valuable for market research and competitive analysis.

Setting Up the Scraper

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

API_KEY = "YOUR_SCRAPERAPI_KEY"  # Get one at https://www.scraperapi.com?fp_ref=the52

def scrape_insurance_comparator(insurance_type, state):
    url = f"https://www.thezebra.com/{insurance_type}/{state}/"
    proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}&render=true"

    response = requests.get(proxy_url, timeout=60)
    soup = BeautifulSoup(response.text, 'html.parser')

    quotes = []
    for card in soup.select('.company-card, .rate-card'):
        company = card.select_one('.company-name, h3')
        rate = card.select_one('.rate, .price')
        if company:
            quotes.append({
                'insurance_type': insurance_type,
                'state': state,
                'company': company.text.strip(),
                'rate': rate.text.strip() if rate else 'N/A',
                'scraped_at': datetime.now().isoformat()
            })
    return quotes

states = ['california', 'texas', 'florida', 'new-york', 'illinois']
types = ['auto-insurance', 'home-insurance', 'life-insurance']

all_quotes = []
for ins_type in types:
    for state in states:
        quotes = scrape_insurance_comparator(ins_type, state)
        all_quotes.extend(quotes)
        print(f"{ins_type}/{state}: {len(quotes)} providers")

df = pd.DataFrame(all_quotes)
df.to_csv('insurance_quotes.csv', index=False)
Enter fullscreen mode Exit fullscreen mode

Parsing and Normalizing Rates

import re

def parse_monthly_rate(rate_str):
    match = re.search(r'\$(\d+[,.]?\d*)', rate_str.replace(',', ''))
    return float(match.group(1)) if match else None

df['monthly_rate'] = df['rate'].apply(parse_monthly_rate)
summary = df.groupby(['insurance_type', 'state'])['monthly_rate'].agg(['mean', 'min', 'max'])
print(summary.round(2).to_string())
Enter fullscreen mode Exit fullscreen mode

Cross-Site Comparison

Use ThorData residential proxies to avoid detection:

def scrape_policygenius(insurance_type):
    url = f"https://www.policygenius.com/{insurance_type}/"
    proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}&render=true"

    response = requests.get(proxy_url, timeout=60)
    soup = BeautifulSoup(response.text, 'html.parser')

    providers = []
    for item in soup.select('.provider-listing'):
        name = item.select_one('.provider-name')
        if name:
            providers.append(name.text.strip())
    return providers
Enter fullscreen mode Exit fullscreen mode

Visualizing Regional Differences

import matplotlib.pyplot as plt

auto = df[df['insurance_type'] == 'auto-insurance']
pivot = auto.pivot_table(values='monthly_rate', index='state', columns='company', aggfunc='mean')
pivot.plot(kind='bar', figsize=(14, 6), title='Auto Insurance Rates by State')
plt.ylabel('Monthly Rate ($)')
plt.tight_layout()
plt.savefig('insurance_rates.png', dpi=150)
Enter fullscreen mode Exit fullscreen mode

Key Findings

  • Auto insurance rates vary up to 300% between states for identical coverage
  • Seasonal patterns exist — rates increase before winter in northern states
  • Insurtechs consistently price 10-20% below incumbents initially
  • Track with ScrapeOps and use ScraperAPI for JS rendering

Top comments (0)