Building a Real Estate Foreclosure Tracker with Public Records
Foreclosure data is public information in the United States, but it's scattered across county clerk websites, court systems, and government databases. Investors, researchers, and journalists who can aggregate this data gain a significant informational advantage. Let's build a Python-based foreclosure tracker.
Why Track Foreclosures?
Foreclosure filings are leading indicators of economic stress in specific markets. They help real estate investors find below-market properties, help journalists investigate predatory lending, and help researchers study housing market dynamics.
Data Sources
Foreclosure data comes from several public sources:
- County clerk/recorder websites — Notice of Default, Lis Pendens filings
- HUD foreclosure listings — government-owned properties
- Court records — judicial foreclosure proceedings
- Census/ACS data — demographic context
HUD Foreclosure Listings Scraper
import requests
from bs4 import BeautifulSoup
import re
SCRAPER_API_KEY = "YOUR_KEY"
class HUDForeclosureScraper:
BASE_URL = "https://www.hudhomestore.gov"
def search_properties(self, state, city=None, zip_code=None):
search_url = f"{self.BASE_URL}/Listing/PropertySearchResult"
form_data = {
"State": state,
"City": city or "",
"Zip": zip_code or "",
"PropertyType": "ALL",
"ListingStatus": "Available"
}
response = requests.post(
"http://api.scraperapi.com",
params={"api_key": SCRAPER_API_KEY, "url": search_url, "render": "true"},
data=form_data,
timeout=60
)
return self._parse_listings(response.text)
def _parse_listings(self, html):
soup = BeautifulSoup(html, "html.parser")
properties = []
for listing in soup.select(".property-listing, .listing-row, tr"):
address = listing.select_one(".address, td:nth-child(1)")
price = listing.select_one(".price, td:nth-child(3)")
status = listing.select_one(".status, td:nth-child(4)")
if address:
properties.append({
"address": address.get_text(strip=True),
"price": price.get_text(strip=True) if price else "N/A",
"status": status.get_text(strip=True) if status else "N/A"
})
return properties
County Records Scraper
class CountyRecordsScraper:
def __init__(self, scraper_api_key):
self.api_key = scraper_api_key
def scrape_county_records(self, county_url, search_params):
response = requests.get(
"http://api.scraperapi.com",
params={"api_key": self.api_key, "url": county_url, "render": "true"},
timeout=60
)
soup = BeautifulSoup(response.text, "html.parser")
filings = []
tables = soup.find_all("table")
for table in tables:
rows = table.find_all("tr")
for row in rows[1:]:
cells = row.find_all("td")
if len(cells) >= 4:
filing = {
"case_number": cells[0].get_text(strip=True),
"filing_date": cells[1].get_text(strip=True),
"document_type": cells[2].get_text(strip=True),
"property_address": cells[3].get_text(strip=True)
}
foreclosure_types = [
"notice of default", "lis pendens",
"notice of sale", "foreclosure"
]
if any(ft in filing["document_type"].lower() for ft in foreclosure_types):
filings.append(filing)
return filings
Geocoding and Market Analysis
import time
def geocode_address(address):
response = requests.get(
"https://nominatim.openstreetmap.org/search",
params={"q": address, "format": "json", "limit": 1},
headers={"User-Agent": "ForeclosureTracker/1.0"}
)
time.sleep(1)
results = response.json()
if results:
return {
"lat": float(results[0]["lat"]),
"lon": float(results[0]["lon"]),
"display_name": results[0]["display_name"]
}
return None
def market_concentration(properties):
from collections import Counter
zips = []
for prop in properties:
zip_match = re.search(r'\b(\d{5})\b', prop.get("address", ""))
if zip_match:
zips.append(zip_match.group(1))
concentration = Counter(zips).most_common(20)
return [{"zip": z, "count": c} for z, c in concentration]
Building the Pipeline
import pandas as pd
from datetime import datetime
class ForeclosureTracker:
def __init__(self, api_key):
self.hud = HUDForeclosureScraper()
self.county = CountyRecordsScraper(api_key)
def daily_scan(self, states, county_urls):
all_properties = []
for state in states:
properties = self.hud.search_properties(state)
for p in properties:
p["source"] = "HUD"
p["state"] = state
all_properties.extend(properties)
for url in county_urls:
filings = self.county.scrape_county_records(url, {})
for f in filings:
f["source"] = "County"
all_properties.extend(filings)
df = pd.DataFrame(all_properties)
df["scan_date"] = datetime.now().isoformat()
filename = f"foreclosures_{datetime.now():%Y%m%d}.csv"
df.to_csv(filename, index=False)
print(f"Found {len(all_properties)} properties across {len(states)} states")
return df
Scaling Across Counties
With over 3,000 counties in the US, scaling requires solid proxy infrastructure. ScraperAPI handles JavaScript rendering for modern county sites. ThorData residential proxies prevent IP blocks during large scans. ScrapeOps tracks success rates per county.
Legal Considerations
All data scraped here is public record. However, respect rate limits, comply with each site's terms of service, and avoid overloading government infrastructure. This tool is designed for legitimate research, journalism, and investment analysis.
Track foreclosures systematically and you'll see market shifts before they appear in the headlines.
Top comments (0)