Real estate data is worth billions — but most of it is siloed behind platforms charging $500-5,000/month for API access. Zillow, Redfin, Realtor.com, and Trulia all have the same listing data. Here's how to extract it.
What's available
Real estate platforms expose:
- Property listings: price, beds/baths, square footage, lot size
- Property details: year built, HOA fees, tax history, price history
- Photos and virtual tours
- Agent/broker contact info
- Days on market, listing status
- Neighborhood data: schools, walkability, commute times
- Recent sold comps
Method 1: Zillow (most data, aggressive anti-bot)
Zillow has the most comprehensive US property data but blocks scrapers aggressively:
from playwright.async_api import async_playwright
import asyncio
import json
async def scrape_zillow_listing(url: str) -> dict:
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=["--disable-blink-features=AutomationControlled"]
)
context = await browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/122.0.0.0 Safari/537.36",
viewport={"width": 1440, "height": 900}
)
page = await context.new_page()
# Block tracking scripts to reduce fingerprint surface
await page.route("**/*.{png,jpg,jpeg,gif,svg,woff,woff2}", lambda r: r.abort())
await page.goto(url, wait_until="domcontentloaded")
# Zillow embeds structured data in a script tag
data = await page.evaluate("""
() => {
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
for (const s of scripts) {
try {
const d = JSON.parse(s.innerText);
if (d['@type'] === 'SingleFamilyResidence' || d['@type'] === 'Residence') {
return d;
}
} catch(e) {}
}
return null;
}
""")
# Also extract from the __NEXT_DATA__ state
next_data = await page.evaluate("""
() => {
const el = document.getElementById('__NEXT_DATA__');
if (el) {
try {
const d = JSON.parse(el.innerText);
return d.props?.pageProps?.gdpClientCache || null;
} catch(e) {}
}
return null;
}
""")
await browser.close()
return {"structured": data, "raw": next_data}
# Test on a listing
listing = asyncio.run(scrape_zillow_listing(
"https://www.zillow.com/homedetails/123-main-st-austin-tx-78701/12345678_zpid/"
))
Method 2: Redfin (more scraper-friendly)
Redfin has an unofficial API that returns JSON:
import requests
def search_redfin_listings(city: str, state: str = "CA") -> list:
# First, get region ID
search_url = "https://www.redfin.com/stingray/do/location-autocomplete"
params = {"location": f"{city}, {state}", "start": 0, "count": 10, "v": 2}
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/122.0.0.0"}
r = requests.get(search_url, params=params, headers=headers)
# Parse region from response
# Then use region ID to get listings
# This endpoint changes frequently — use the Apify actor for reliability
pass
def get_redfin_listing_details(listing_url: str) -> dict:
"""Extract listing data from Redfin page HTML"""
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/122.0.0.0"}
response = requests.get(listing_url, headers=headers)
if response.status_code == 200:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
# Extract key stats
price = soup.select_one("[data-rf-test-id='abp-price']")
beds = soup.select_one("[data-rf-test-id='abp-beds']")
baths = soup.select_one("[data-rf-test-id='abp-baths']")
sqft = soup.select_one("[data-rf-test-id='abp-sqFt']")
return {
"price": price.text if price else None,
"beds": beds.text if beds else None,
"baths": baths.text if baths else None,
"sqft": sqft.text if sqft else None,
}
return {}
Method 3: Realtor.com (structured data available)
Realtor.com includes JSON-LD structured data on listing pages:
import requests
from bs4 import BeautifulSoup
import json
def scrape_realtor_listing(url: str) -> dict:
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Chrome/122.0.0.0"}
r = requests.get(url, headers=headers)
if r.status_code != 200:
return {}
soup = BeautifulSoup(r.text, "html.parser")
# JSON-LD structured data
for script in soup.find_all("script", type="application/ld+json"):
try:
data = json.loads(script.string)
if data.get("@type") in ["SingleFamilyResidence", "RealEstateListing"]:
return {
"name": data.get("name"),
"price": data.get("offers", {}).get("price"),
"address": data.get("address", {}).get("streetAddress"),
"city": data.get("address", {}).get("addressLocality"),
"state": data.get("address", {}).get("addressRegion"),
"beds": data.get("numberOfRooms"),
"description": data.get("description", "")[:300],
}
except (json.JSONDecodeError, AttributeError):
continue
return {}
Method 4: Pre-built real estate tracker
The Real Estate Tracker on Apify handles Zillow, Redfin, and Realtor.com with proxy rotation and anti-bot.
Input: city/neighborhood, property type, price range filters
Output: standardized property listings with price history
48+ production runs. Pay-per-result pricing.
Building a price tracker
Track price changes on properties you're monitoring:
import sqlite3
from datetime import datetime
def init_db():
conn = sqlite3.connect("real_estate.db")
conn.execute("""
CREATE TABLE IF NOT EXISTS listings (
zpid TEXT,
address TEXT,
price REAL,
beds INTEGER,
baths REAL,
sqft INTEGER,
status TEXT,
scraped_at TIMESTAMP
)
""")
conn.commit()
return conn
def track_listing(conn, listing_data: dict):
# Insert new record (keeps history)
conn.execute(
"INSERT INTO listings VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(
listing_data.get("zpid"),
listing_data.get("address"),
listing_data.get("price"),
listing_data.get("beds"),
listing_data.get("baths"),
listing_data.get("sqft"),
listing_data.get("status"),
datetime.now().isoformat()
)
)
conn.commit()
def get_price_history(conn, zpid: str) -> list:
cursor = conn.execute(
"SELECT price, status, scraped_at FROM listings WHERE zpid = ? ORDER BY scraped_at",
(zpid,)
)
return cursor.fetchall()
Use cases
- Investment analysis: Monitor properties matching your criteria for price drops
- Market research: Track median prices and days-on-market by neighborhood
- Lead generation: Agent contact info from listings in specific price ranges
- Price prediction: Build ML models on historical price change data
- Rental arbitrage: Compare purchase price vs estimated rental income
n8n AI Automation Pack ($39) — 5 production-ready workflows
Skip the setup
Apify Scrapers Bundle — $29 one-time
Includes the Real Estate Tracker and 34+ other production scrapers. Instant download.
Top comments (0)