DEV Community

agenthustler
agenthustler

Posted on

How to Scrape Redfin and Realtor.com Property Listings with Python

Redfin and Realtor.com are the largest US real estate platforms. Extract their data for investment analysis, price comparison, and market tracking.

Redfin API

import requests, json

class RedfinScraper:
    def __init__(self):
        self.s = requests.Session()
        self.s.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"

    def search(self, region_id, n=350):
        r = self.s.get("https://www.redfin.com/stingray/api/gis",
            params={"al":1,"num_homes":n,"region_id":region_id,"region_type":6,
                    "sf":"1,2,3,5,6,7","status":9,"v":8})
        txt = r.text[4:] if r.text.startswith("{}&&") else r.text
        return [{
            "address": h.get("streetLine",{}).get("value",""),
            "price": h.get("price",{}).get("value",0),
            "beds": h.get("beds",0), "baths": h.get("baths",0),
            "sqft": h.get("sqFt",{}).get("value",0),
        } for h in json.loads(txt).get("payload",{}).get("homes",[])]
Enter fullscreen mode Exit fullscreen mode

Realtor.com GraphQL

class RealtorScraper:
    def __init__(self):
        self.s = requests.Session()
        self.s.headers.update({"User-Agent":"Mozilla/5.0","Content-Type":"application/json"})

    def search(self, city, state, limit=50):
        q = {"query":'''query Q($q:SearchInput!,$l:Int,$sort:[SortInput]){
            home_search(query:$q,limit:$l,sort:$sort){total results{
                property_id list_price description{beds baths sqft}
                location{address{line city state_code}}
            }}}''',
            "variables":{"q":{"city":city,"state_code":state,"status":["for_sale"]},
                         "l":limit,"sort":[{"field":"list_date","direction":"desc"}]}}
        r = self.s.post("https://www.realtor.com/api/v1/rdc_search_srp", json=q)
        return [{"price":x.get("list_price"),"beds":x.get("description",{}).get("beds"),
                 "sqft":x.get("description",{}).get("sqft"),
                 "address":x.get("location",{}).get("address",{}).get("line")}
                for x in r.json().get("data",{}).get("home_search",{}).get("results",[])]
Enter fullscreen mode Exit fullscreen mode

Price Comparison

def compare(city, state, rid):
    rf, rt = RedfinScraper().search(rid), RealtorScraper().search(city, state)
    for name, homes in [("Redfin",rf),("Realtor",rt)]:
        prices = [h["price"] for h in homes if h.get("price")]
        ppsf = [h["price"]/h["sqft"] for h in homes if h.get("sqft") and h.get("price")]
        if prices: print(f"{name}: {len(homes)} listings, median ${sorted(prices)[len(prices)//2]:,.0f}")
        if ppsf: print(f"  avg $/sqft: ${sum(ppsf)/len(ppsf):,.0f}")

compare("Austin", "TX", 30818)
Enter fullscreen mode Exit fullscreen mode

Anti-Bot

ScraperAPI handles JS rendering and CAPTCHAs. ThorData residential proxies mimic real traffic. ScrapeOps tracks endpoint success.

Dashboard Ideas

Price discrepancies between platforms, listing freshness, $/sqft by neighborhood, days on market trends.

Top comments (0)