Zillow is the largest real estate marketplace in the US, with data on over 100 million properties. Extracting property data, Zestimates, and market trends enables powerful real estate analysis. Here's how to do it with Python in 2026.
What Data Can You Extract?
- Property addresses, prices, and Zestimates
- Square footage, bedrooms, bathrooms
- Listing status (for sale, sold, pending)
- Price history and tax records
- Neighborhood statistics
- Photos and virtual tour links
Using Zillow's Hidden API
Zillow loads data through internal API endpoints that return JSON. This is more reliable than parsing HTML:
import requests
import json
def search_zillow_api(location, page=1):
"""Search Zillow using their internal search API."""
url = "https://www.zillow.com/search/GetSearchPageState.htm"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
"Accept": "application/json",
"Referer": f"https://www.zillow.com/{location.lower().replace(' ', '-')}/",
}
# Search parameters
search_query = {
"pagination": {"currentPage": page},
"mapBounds": {
"west": -122.5,
"east": -122.3,
"south": 37.7,
"north": 37.85,
},
"filterState": {
"isForSaleByAgent": {"value": True},
"isForSaleByOwner": {"value": True},
"isNewConstruction": {"value": False},
"isForSaleForeclosure": {"value": False},
"isComingSoon": {"value": False},
"isAuction": {"value": False},
},
}
params = {
"searchQueryState": json.dumps(search_query),
"wants": json.dumps({"cat1": ["listResults", "mapResults"]}),
"requestId": 1,
}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
return response.json()
print(f"Error: {response.status_code}")
return None
Parsing Search Results
def parse_search_results(api_response):
"""Extract property listings from Zillow API response."""
results = []
if not api_response:
return results
search_results = (
api_response
.get("cat1", {})
.get("searchResults", {})
.get("listResults", [])
)
for listing in search_results:
property_data = {
"zpid": listing.get("zpid"),
"address": listing.get("address"),
"price": listing.get("price"),
"unformatted_price": listing.get("unformattedPrice"),
"beds": listing.get("beds"),
"baths": listing.get("baths"),
"area_sqft": listing.get("area"),
"latitude": listing.get("latLong", {}).get("latitude"),
"longitude": listing.get("latLong", {}).get("longitude"),
"status": listing.get("statusText"),
"listing_url": listing.get("detailUrl"),
"broker": listing.get("brokerName"),
"zestimate": listing.get("zestimate"),
"price_per_sqft": None,
}
# Calculate price per sqft
if property_data["unformatted_price"] and property_data["area_sqft"]:
try:
property_data["price_per_sqft"] = round(
property_data["unformatted_price"] / property_data["area_sqft"], 2
)
except (TypeError, ZeroDivisionError):
pass
results.append(property_data)
return results
Scraping Individual Property Pages
from playwright.sync_api import sync_playwright
import re
def scrape_property_details(zpid):
"""Scrape detailed property information from Zillow."""
url = f"https://www.zillow.com/homedetails/{zpid}_zpid/"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"
)
page = context.new_page()
page.goto(url, wait_until="networkidle")
details = {}
# Extract from Next.js data
scripts = page.query_selector_all('script[type="application/json"]')
for script in scripts:
try:
data = json.loads(script.inner_text())
if isinstance(data, dict) and "props" in data:
page_props = data["props"].get("pageProps", {})
if "componentProps" in page_props:
details["raw_data"] = page_props["componentProps"]
break
except json.JSONDecodeError:
continue
# Fallback: extract from visible elements
price_el = page.query_selector('span[data-testid="price"]')
details['price'] = price_el.inner_text() if price_el else None
# Property facts
facts = page.query_selector_all('span[data-testid="bed-bath-beyond"]')
for fact in facts:
text = fact.inner_text()
if "bed" in text.lower():
details['beds'] = text
elif "bath" in text.lower():
details['baths'] = text
elif "sqft" in text.lower():
details['sqft'] = text
# Zestimate
zestimate_el = page.query_selector('div[data-testid="zestimate-text"]')
details['zestimate'] = zestimate_el.inner_text() if zestimate_el else None
# Price history
details['price_history'] = extract_price_history(page)
browser.close()
return details
def extract_price_history(page):
"""Extract price history from a Zillow listing page."""
history = []
# Try to expand price history section
expand_btn = page.query_selector('button:has-text("See complete price history")')
if expand_btn:
expand_btn.click()
import time
time.sleep(2)
rows = page.query_selector_all('table[class*="price-history"] tr')
for row in rows[1:]: # Skip header
cells = row.query_selector_all('td')
if len(cells) >= 3:
history.append({
"date": cells[0].inner_text(),
"event": cells[1].inner_text(),
"price": cells[2].inner_text(),
})
return history
Market Analysis
import statistics
def analyze_market(properties):
"""Analyze a set of properties for market insights."""
prices = [p["unformatted_price"] for p in properties if p["unformatted_price"]]
sqft_prices = [p["price_per_sqft"] for p in properties if p["price_per_sqft"]]
analysis = {
"total_listings": len(properties),
"price_stats": {
"median": statistics.median(prices) if prices else None,
"mean": round(statistics.mean(prices)) if prices else None,
"min": min(prices) if prices else None,
"max": max(prices) if prices else None,
"stdev": round(statistics.stdev(prices)) if len(prices) > 1 else None,
},
"price_per_sqft_stats": {
"median": round(statistics.median(sqft_prices)) if sqft_prices else None,
"mean": round(statistics.mean(sqft_prices)) if sqft_prices else None,
},
"by_bedrooms": {},
}
# Group by bedrooms
from collections import defaultdict
by_beds = defaultdict(list)
for p in properties:
beds = p.get("beds")
if beds and p.get("unformatted_price"):
by_beds[beds].append(p["unformatted_price"])
for beds, bed_prices in sorted(by_beds.items()):
analysis["by_bedrooms"][f"{beds}_bed"] = {
"count": len(bed_prices),
"median_price": statistics.median(bed_prices),
}
return analysis
Data Export
import csv
def export_properties(properties, filename="zillow_data.csv"):
if not properties:
return
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=properties[0].keys())
writer.writeheader()
writer.writerows(properties)
print(f"Exported {len(properties)} properties to {filename}")
def export_analysis(analysis, filename="market_analysis.json"):
with open(filename, "w") as f:
json.dump(analysis, f, indent=2)
print(f"Analysis saved to {filename}")
Running the Pipeline
if __name__ == "__main__":
import time
# Search for properties
print("Searching Zillow...")
api_data = search_zillow_api("San Francisco CA")
properties = parse_search_results(api_data)
print(f"Found {len(properties)} properties")
# Export raw data
export_properties(properties)
# Market analysis
analysis = analyze_market(properties)
export_analysis(analysis)
print(f"\nMarket Summary:")
print(f" Median Price: ${analysis['price_stats']['median']:,.0f}")
print(f" Median $/sqft: ${analysis['price_per_sqft_stats']['median']:,}")
print(f" Listings: {analysis['total_listings']}")
Proxy Recommendations
Zillow has aggressive anti-scraping measures. For reliable data collection, use ScrapeOps which provides proxy rotation and request management optimized for real estate sites.
Legal Disclaimer
Zillow's Terms of Service prohibit automated data collection. This guide is for educational purposes. For production use, consider Zillow's official API or licensed data partnerships.
Conclusion
Zillow real estate scraping enables powerful property analysis and market intelligence. Start with the hidden API endpoints for search results, then use browser automation for individual property details. Always use proper proxy rotation and respect rate limits.
Top comments (0)