Zillow is the largest real estate marketplace in the US, with data on over 100 million properties. Extracting property data, Zestimates, and market trends enables powerful real estate analysis. Here's how to do it with Python in 2026.
What Data Can You Extract?
- Property addresses, prices, and Zestimates
- Square footage, bedrooms, bathrooms
- Listing status (for sale, sold, pending)
- Price history and tax records
- Neighborhood statistics
- Photos and virtual tour links
Using Zillow's Hidden API
Zillow loads data through internal API endpoints that return JSON. This is more reliable than parsing HTML:
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Parsing Search Results
def parse_search_results(api_response):
"""Extract property listings from Zillow API response."""
results = []
if not api_response:
return results
search_results = (
api_response
.get("cat1", {})
.get("searchResults", {})
.get("listResults", [])
)
for listing in search_results:
property_data = {
"zpid": listing.get("zpid"),
"address": listing.get("address"),
"price": listing.get("price"),
"unformatted_price": listing.get("unformattedPrice"),
"beds": listing.get("beds"),
"baths": listing.get("baths"),
"area_sqft": listing.get("area"),
"latitude": listing.get("latLong", {}).get("latitude"),
"longitude": listing.get("latLong", {}).get("longitude"),
"status": listing.get("statusText"),
"listing_url": listing.get("detailUrl"),
"broker": listing.get("brokerName"),
"zestimate": listing.get("zestimate"),
"price_per_sqft": None,
}
# Calculate price per sqft
if property_data["unformatted_price"] and property_data["area_sqft"]:
try:
property_data["price_per_sqft"] = round(
property_data["unformatted_price"] / property_data["area_sqft"], 2
)
except (TypeError, ZeroDivisionError):
pass
results.append(property_data)
return results
Scraping Individual Property Pages
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Market Analysis
import statistics
def analyze_market(properties):
"""Analyze a set of properties for market insights."""
prices = [p["unformatted_price"] for p in properties if p["unformatted_price"]]
sqft_prices = [p["price_per_sqft"] for p in properties if p["price_per_sqft"]]
analysis = {
"total_listings": len(properties),
"price_stats": {
"median": statistics.median(prices) if prices else None,
"mean": round(statistics.mean(prices)) if prices else None,
"min": min(prices) if prices else None,
"max": max(prices) if prices else None,
"stdev": round(statistics.stdev(prices)) if len(prices) > 1 else None,
},
"price_per_sqft_stats": {
"median": round(statistics.median(sqft_prices)) if sqft_prices else None,
"mean": round(statistics.mean(sqft_prices)) if sqft_prices else None,
},
"by_bedrooms": {},
}
# Group by bedrooms
from collections import defaultdict
by_beds = defaultdict(list)
for p in properties:
beds = p.get("beds")
if beds and p.get("unformatted_price"):
by_beds[beds].append(p["unformatted_price"])
for beds, bed_prices in sorted(by_beds.items()):
analysis["by_bedrooms"][f"{beds}_bed"] = {
"count": len(bed_prices),
"median_price": statistics.median(bed_prices),
}
return analysis
Data Export
import csv
def export_properties(properties, filename="zillow_data.csv"):
if not properties:
return
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=properties[0].keys())
writer.writeheader()
writer.writerows(properties)
print(f"Exported {len(properties)} properties to {filename}")
def export_analysis(analysis, filename="market_analysis.json"):
with open(filename, "w") as f:
json.dump(analysis, f, indent=2)
print(f"Analysis saved to {filename}")
Running the Pipeline
if __name__ == "__main__":
import time
# Search for properties
print("Searching Zillow...")
api_data = search_zillow_api("San Francisco CA")
properties = parse_search_results(api_data)
print(f"Found {len(properties)} properties")
# Export raw data
export_properties(properties)
# Market analysis
analysis = analyze_market(properties)
export_analysis(analysis)
print(f"\nMarket Summary:")
print(f" Median Price: ${analysis['price_stats']['median']:,.0f}")
print(f" Median $/sqft: ${analysis['price_per_sqft_stats']['median']:,}")
print(f" Listings: {analysis['total_listings']}")
Proxy Recommendations
Zillow has aggressive anti-scraping measures. For reliable data collection, use ScrapeOps which provides proxy rotation and request management optimized for real estate sites.
Legal Disclaimer
Zillow's Terms of Service prohibit automated data collection. This guide is for educational purposes. For production use, consider Zillow's official API or licensed data partnerships.
Conclusion
Zillow real estate scraping enables powerful property analysis and market intelligence. Start with the hidden API endpoints for search results, then use browser automation for individual property details. Always use proper proxy rotation and respect rate limits.
Top comments (0)