Zillow Real Estate Scraping: Property Data, Prices, and Estimates with Python

#python #webdev #tutorial #webscraping

Zillow is the largest real estate marketplace in the US, with data on over 100 million properties. Extracting property data, Zestimates, and market trends enables powerful real estate analysis. Here's how to do it with Python in 2026.

What Data Can You Extract?

Property addresses, prices, and Zestimates
Square footage, bedrooms, bathrooms
Listing status (for sale, sold, pending)
Price history and tax records
Neighborhood statistics
Photos and virtual tour links

Using Zillow's Hidden API

Zillow loads data through internal API endpoints that return JSON. This is more reliable than parsing HTML:

# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).

Parsing Search Results

def parse_search_results(api_response):
    """Extract property listings from Zillow API response."""
    results = []

    if not api_response:
        return results

    search_results = (
        api_response
        .get("cat1", {})
        .get("searchResults", {})
        .get("listResults", [])
    )

    for listing in search_results:
        property_data = {
            "zpid": listing.get("zpid"),
            "address": listing.get("address"),
            "price": listing.get("price"),
            "unformatted_price": listing.get("unformattedPrice"),
            "beds": listing.get("beds"),
            "baths": listing.get("baths"),
            "area_sqft": listing.get("area"),
            "latitude": listing.get("latLong", {}).get("latitude"),
            "longitude": listing.get("latLong", {}).get("longitude"),
            "status": listing.get("statusText"),
            "listing_url": listing.get("detailUrl"),
            "broker": listing.get("brokerName"),
            "zestimate": listing.get("zestimate"),
            "price_per_sqft": None,
        }

        # Calculate price per sqft
        if property_data["unformatted_price"] and property_data["area_sqft"]:
            try:
                property_data["price_per_sqft"] = round(
                    property_data["unformatted_price"] / property_data["area_sqft"], 2
                )
            except (TypeError, ZeroDivisionError):
                pass

        results.append(property_data)

    return results

Scraping Individual Property Pages

# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).

Market Analysis

import statistics

def analyze_market(properties):
    """Analyze a set of properties for market insights."""
    prices = [p["unformatted_price"] for p in properties if p["unformatted_price"]]
    sqft_prices = [p["price_per_sqft"] for p in properties if p["price_per_sqft"]]

    analysis = {
        "total_listings": len(properties),
        "price_stats": {
            "median": statistics.median(prices) if prices else None,
            "mean": round(statistics.mean(prices)) if prices else None,
            "min": min(prices) if prices else None,
            "max": max(prices) if prices else None,
            "stdev": round(statistics.stdev(prices)) if len(prices) > 1 else None,
        },
        "price_per_sqft_stats": {
            "median": round(statistics.median(sqft_prices)) if sqft_prices else None,
            "mean": round(statistics.mean(sqft_prices)) if sqft_prices else None,
        },
        "by_bedrooms": {},
    }

    # Group by bedrooms
    from collections import defaultdict
    by_beds = defaultdict(list)
    for p in properties:
        beds = p.get("beds")
        if beds and p.get("unformatted_price"):
            by_beds[beds].append(p["unformatted_price"])

    for beds, bed_prices in sorted(by_beds.items()):
        analysis["by_bedrooms"][f"{beds}_bed"] = {
            "count": len(bed_prices),
            "median_price": statistics.median(bed_prices),
        }

    return analysis

Data Export

import csv

def export_properties(properties, filename="zillow_data.csv"):
    if not properties:
        return

    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=properties[0].keys())
        writer.writeheader()
        writer.writerows(properties)

    print(f"Exported {len(properties)} properties to {filename}")

def export_analysis(analysis, filename="market_analysis.json"):
    with open(filename, "w") as f:
        json.dump(analysis, f, indent=2)
    print(f"Analysis saved to {filename}")

Running the Pipeline

if __name__ == "__main__":
    import time

    # Search for properties
    print("Searching Zillow...")
    api_data = search_zillow_api("San Francisco CA")
    properties = parse_search_results(api_data)
    print(f"Found {len(properties)} properties")

    # Export raw data
    export_properties(properties)

    # Market analysis
    analysis = analyze_market(properties)
    export_analysis(analysis)

    print(f"\nMarket Summary:")
    print(f"  Median Price: ${analysis['price_stats']['median']:,.0f}")
    print(f"  Median $/sqft: ${analysis['price_per_sqft_stats']['median']:,}")
    print(f"  Listings: {analysis['total_listings']}")

Proxy Recommendations

Zillow has aggressive anti-scraping measures. For reliable data collection, use ScrapeOps which provides proxy rotation and request management optimized for real estate sites.

Legal Disclaimer

Zillow's Terms of Service prohibit automated data collection. This guide is for educational purposes. For production use, consider Zillow's official API or licensed data partnerships.

Conclusion

Zillow real estate scraping enables powerful property analysis and market intelligence. Start with the hidden API endpoints for search results, then use browser automation for individual property details. Always use proper proxy rotation and respect rate limits.

DEV Community