Redfin and Realtor.com are the largest US real estate platforms. Extract their data for investment analysis, price comparison, and market tracking.
Redfin API
import requests, json
class RedfinScraper:
def __init__(self):
self.s = requests.Session()
self.s.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"
def search(self, region_id, n=350):
r = self.s.get("https://www.redfin.com/stingray/api/gis",
params={"al":1,"num_homes":n,"region_id":region_id,"region_type":6,
"sf":"1,2,3,5,6,7","status":9,"v":8})
txt = r.text[4:] if r.text.startswith("{}&&") else r.text
return [{
"address": h.get("streetLine",{}).get("value",""),
"price": h.get("price",{}).get("value",0),
"beds": h.get("beds",0), "baths": h.get("baths",0),
"sqft": h.get("sqFt",{}).get("value",0),
} for h in json.loads(txt).get("payload",{}).get("homes",[])]
Realtor.com GraphQL
class RealtorScraper:
def __init__(self):
self.s = requests.Session()
self.s.headers.update({"User-Agent":"Mozilla/5.0","Content-Type":"application/json"})
def search(self, city, state, limit=50):
q = {"query":'''query Q($q:SearchInput!,$l:Int,$sort:[SortInput]){
home_search(query:$q,limit:$l,sort:$sort){total results{
property_id list_price description{beds baths sqft}
location{address{line city state_code}}
}}}''',
"variables":{"q":{"city":city,"state_code":state,"status":["for_sale"]},
"l":limit,"sort":[{"field":"list_date","direction":"desc"}]}}
r = self.s.post("https://www.realtor.com/api/v1/rdc_search_srp", json=q)
return [{"price":x.get("list_price"),"beds":x.get("description",{}).get("beds"),
"sqft":x.get("description",{}).get("sqft"),
"address":x.get("location",{}).get("address",{}).get("line")}
for x in r.json().get("data",{}).get("home_search",{}).get("results",[])]
Price Comparison
def compare(city, state, rid):
rf, rt = RedfinScraper().search(rid), RealtorScraper().search(city, state)
for name, homes in [("Redfin",rf),("Realtor",rt)]:
prices = [h["price"] for h in homes if h.get("price")]
ppsf = [h["price"]/h["sqft"] for h in homes if h.get("sqft") and h.get("price")]
if prices: print(f"{name}: {len(homes)} listings, median ${sorted(prices)[len(prices)//2]:,.0f}")
if ppsf: print(f" avg $/sqft: ${sum(ppsf)/len(ppsf):,.0f}")
compare("Austin", "TX", 30818)
Anti-Bot
ScraperAPI handles JS rendering and CAPTCHAs. ThorData residential proxies mimic real traffic. ScrapeOps tracks endpoint success.
Dashboard Ideas
Price discrepancies between platforms, listing freshness, $/sqft by neighborhood, days on market trends.
Top comments (0)