Rental yield — the annual rent divided by property price — is the fundamental metric for real estate investing. Scraping this data across markets lets you compare investment opportunities globally.
Why Scrape Rental Yields?
Commercial real estate data costs thousands per year. Public property listings contain all the information needed to calculate yields. A well-built scraper gives you institutional-grade data for free.
Setup
pip install requests beautifulsoup4 pandas numpy
Scraping Property Listings
Here's a framework for collecting rental and sale prices from property portals:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
def scrape_property_listings(city, listing_type="rent"):
params = {
"api_key": "YOUR_SCRAPERAPI_KEY",
"url": f"https://example-property-site.com/{city}/{listing_type}",
"render": "true"
}
response = requests.get("https://api.scraperapi.com", params=params)
soup = BeautifulSoup(response.text, "html.parser")
listings = []
for card in soup.select(".property-card"):
price_el = card.select_one(".price")
beds_el = card.select_one(".bedrooms")
area_el = card.select_one(".area")
neighborhood_el = card.select_one(".location")
price = extract_price(price_el.text if price_el else "0")
listings.append({
"city": city,
"type": listing_type,
"price": price,
"bedrooms": int(beds_el.text.strip()[0]) if beds_el else 0,
"area_sqm": extract_number(area_el.text if area_el else "0"),
"neighborhood": neighborhood_el.text.strip() if neighborhood_el else ""
})
return listings
def extract_price(text):
numbers = re.findall(r"[\d,]+\.?\d*", text.replace(",", ""))
return float(numbers[0]) if numbers else 0
def extract_number(text):
numbers = re.findall(r"[\d.]+", text)
return float(numbers[0]) if numbers else 0
Calculating Rental Yields by Neighborhood
def calculate_yields(cities):
all_data = []
for city in cities:
rentals = scrape_property_listings(city, "rent")
sales = scrape_property_listings(city, "sale")
rent_df = pd.DataFrame(rentals)
sale_df = pd.DataFrame(sales)
# Average by neighborhood and bedrooms
avg_rent = rent_df.groupby(["neighborhood", "bedrooms"])["price"].median()
avg_sale = sale_df.groupby(["neighborhood", "bedrooms"])["price"].median()
# Calculate yield
for (neighborhood, beds), monthly_rent in avg_rent.items():
if (neighborhood, beds) in avg_sale.index:
sale_price = avg_sale[(neighborhood, beds)]
annual_rent = monthly_rent * 12
gross_yield = (annual_rent / sale_price) * 100
all_data.append({
"city": city,
"neighborhood": neighborhood,
"bedrooms": beds,
"avg_monthly_rent": monthly_rent,
"avg_sale_price": sale_price,
"gross_yield_pct": round(gross_yield, 2)
})
return pd.DataFrame(all_data)
cities = ["london", "berlin", "lisbon", "dubai", "bangkok"]
yields = calculate_yields(cities)
print(yields.sort_values("gross_yield_pct", ascending=False).head(20))
Market Comparison Analysis
import numpy as np
def market_comparison(yields_df):
summary = yields_df.groupby("city").agg({
"gross_yield_pct": ["mean", "median", "std", "min", "max"],
"avg_sale_price": "median",
"avg_monthly_rent": "median"
}).round(2)
print("\nGlobal Rental Yield Comparison:")
print("=" * 60)
city_stats = yields_df.groupby("city")["gross_yield_pct"].agg(["mean", "std"])
city_stats = city_stats.sort_values("mean", ascending=False)
for city, row in city_stats.iterrows():
bar = "█" * int(row["mean"] * 2)
print(f" {city:15s} {row['mean']:5.2f}% (±{row['std']:.2f}) {bar}")
# Find best value neighborhoods
print("\nTop 10 Highest-Yield Neighborhoods:")
top = yields_df.nlargest(10, "gross_yield_pct")
for _, row in top.iterrows():
print(f" {row['city']}/{row['neighborhood']}: "
f"{row['gross_yield_pct']:.2f}% "
f"(${row['avg_monthly_rent']:,.0f}/mo, "
f"${row['avg_sale_price']:,.0f} purchase)")
market_comparison(yields)
Price-to-Rent Ratio Tracking
def price_to_rent_ratio(yields_df):
yields_df["price_to_rent"] = (
yields_df["avg_sale_price"] /
(yields_df["avg_monthly_rent"] * 12)
).round(1)
# Classification
def classify(ratio):
if ratio < 15:
return "Buy favorable"
elif ratio < 20:
return "Neutral"
else:
return "Rent favorable"
yields_df["recommendation"] = yields_df["price_to_rent"].apply(classify)
city_avg = yields_df.groupby("city")["price_to_rent"].median()
for city, ratio in city_avg.sort_values().items():
rec = classify(ratio)
print(f" {city}: {ratio:.1f}x ({rec})")
price_to_rent_ratio(yields)
Scraping Infrastructure
- ScraperAPI renders JavaScript-heavy property portals and rotates proxies automatically
- ThorData gives you geo-targeted IPs for accessing region-locked property sites
- ScrapeOps tracks success rates across all your property scrapers
Conclusion
Scraping rental yields across global markets gives you an investing edge that normally costs thousands. Start with 2-3 cities you know well (to validate your data), then expand globally. Remember to cache results and respect rate limits — property sites update slowly, so daily scraping is usually sufficient.
Top comments (0)