Airbnb data is incredibly valuable for travel analytics, competitive pricing, and real estate market research. In this guide, we'll extract listing prices, reviews, and availability using Python.
Why Extract Airbnb Data?
- Property managers: Monitor competitor pricing in your area
- Travel analysts: Track seasonal price trends across destinations
- Real estate investors: Assess short-term rental potential for properties
- Market researchers: Understand supply and demand patterns
Setting Up the Scraper
Airbnb is a React-based SPA, so we need a browser automation tool:
from playwright.sync_api import sync_playwright
import json
import time
import random
import csv
from dataclasses import dataclass, asdict, field
from typing import List, Optional
@dataclass
class AirbnbListing:
title: str
price_per_night: str
total_price: Optional[str]
rating: Optional[str]
review_count: Optional[int]
property_type: str
beds: Optional[str]
location: str
url: str
amenities: List[str] = field(default_factory=list)
Scraping Search Results
def scrape_airbnb_search(location, checkin, checkout, adults=2):
"""Scrape Airbnb search results for a destination."""
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36",
)
page = context.new_page()
# Build search URL
search_url = (
f"https://www.airbnb.com/s/{location}/homes"
f"?checkin={checkin}&checkout={checkout}"
f"&adults={adults}"
)
page.goto(search_url, wait_until="networkidle")
time.sleep(random.uniform(3, 6))
# Close any popups
try:
close_btn = page.query_selector('button[aria-label="Close"]')
if close_btn:
close_btn.click()
time.sleep(1)
except:
pass
listings = extract_listings(page)
browser.close()
return listings
def extract_listings(page):
"""Extract listing data from search results page."""
listings = []
cards = page.query_selector_all('div[itemprop="itemListElement"]')
for card in cards:
try:
# Title
title_el = card.query_selector('div[data-testid="listing-card-title"]')
title = title_el.inner_text() if title_el else "N/A"
# Price
price_el = card.query_selector('span._14y1168')
price = price_el.inner_text() if price_el else "N/A"
# Rating
rating_el = card.query_selector('span.r1dod31s')
rating = rating_el.inner_text() if rating_el else None
# URL
link_el = card.query_selector('a[href*="/rooms/"]')
url = link_el.get_attribute("href") if link_el else ""
if url and not url.startswith("http"):
url = f"https://www.airbnb.com{url}"
# Property type and beds
subtitle_el = card.query_selector('div[data-testid="listing-card-subtitle"]')
subtitle = subtitle_el.inner_text() if subtitle_el else ""
listings.append(AirbnbListing(
title=title,
price_per_night=price,
total_price=None,
rating=rating,
review_count=None,
property_type=subtitle.split("\n")[0] if subtitle else "N/A",
beds=None,
location=subtitle,
url=url,
))
except Exception as e:
print(f"Error parsing listing: {e}")
continue
return listings
Extracting Individual Listing Details
def scrape_listing_details(page, listing_url):
"""Get detailed information from an individual listing page."""
page.goto(listing_url, wait_until="networkidle")
time.sleep(random.uniform(2, 5))
details = {}
# Full title
title_el = page.query_selector('h1')
details['title'] = title_el.inner_text() if title_el else None
# Host info
host_el = page.query_selector('div[data-section-id="HOST_OVERVIEW"]')
if host_el:
details['host'] = host_el.inner_text()
# Amenities
amenities_btn = page.query_selector('button:has-text("Show all amenities")')
amenities = []
if amenities_btn:
amenities_btn.click()
time.sleep(2)
amenity_els = page.query_selector_all('div[data-testid="amenity-row"]')
amenities = [el.inner_text() for el in amenity_els]
# Close modal
close = page.query_selector('button[aria-label="Close"]')
if close:
close.click()
details['amenities'] = amenities
# Reviews
details['reviews'] = extract_listing_reviews(page)
return details
Extracting Reviews
def extract_listing_reviews(page, max_reviews=20):
"""Extract reviews from a listing page."""
reviews = []
# Click "Show all reviews" if available
show_all = page.query_selector('button:has-text("Show all")')
if show_all and "review" in (show_all.inner_text() or "").lower():
show_all.click()
time.sleep(3)
review_els = page.query_selector_all('div[data-review-id]')
for el in review_els[:max_reviews]:
review = {}
author_el = el.query_selector('h3')
review['author'] = author_el.inner_text() if author_el else None
date_el = el.query_selector('li')
review['date'] = date_el.inner_text() if date_el else None
text_el = el.query_selector('span[data-testid="review-text"]')
review['text'] = text_el.inner_text() if text_el else None
reviews.append(review)
return reviews
Price Tracking Over Time
import sqlite3
from datetime import datetime
def setup_price_db(db_path="airbnb_prices.db"):
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS price_history (
listing_url TEXT,
price TEXT,
checkin TEXT,
checkout TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
return conn
def track_price(conn, listing_url, price, checkin, checkout):
conn.execute(
"INSERT INTO price_history (listing_url, price, checkin, checkout) VALUES (?, ?, ?, ?)",
(listing_url, price, checkin, checkout)
)
conn.commit()
def get_price_history(conn, listing_url):
cursor = conn.execute(
"SELECT price, checkin, scraped_at FROM price_history WHERE listing_url = ? ORDER BY scraped_at",
(listing_url,)
)
return cursor.fetchall()
Complete Pipeline
if __name__ == "__main__":
listings = scrape_airbnb_search(
location="Barcelona",
checkin="2026-06-01",
checkout="2026-06-07",
adults=2,
)
print(f"Found {len(listings)} listings")
# Export to CSV
with open("airbnb_listings.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=asdict(listings[0]).keys())
writer.writeheader()
for listing in listings:
writer.writerow(asdict(listing))
print("Data exported to airbnb_listings.csv")
Handling Anti-Scraping
Airbnb uses aggressive bot detection. For reliable scraping at scale, proxy rotation is essential. ScraperAPI provides managed proxy infrastructure with automatic rotation and JavaScript rendering support.
Conclusion
Airbnb data extraction opens up valuable insights for property management, travel analytics, and real estate research. Remember to respect rate limits, use proper delays between requests, and consider the legal implications of scraping in your jurisdiction.
Top comments (0)