Travel booking data is among the most valuable — and hardest to scrape — on the web. Booking.com, Expedia, and Kayak use aggressive anti-bot measures, dynamic pricing, and JavaScript-heavy interfaces. Here's how to extract travel pricing data reliably.
Why Travel Data Is Challenging
- Heavy JavaScript rendering (React/Next.js frontends)
- CAPTCHAs and bot detection (Akamai, PerimeterX, DataDome)
- Session-based pricing (cookies influence displayed prices)
- Dynamic content loaded via XHR/API calls
- Geo-dependent pricing (different prices by location)
Setting Up Robust Scraping
import requests
from bs4 import BeautifulSoup
import json
import time
from datetime import datetime, timedelta
API_KEY = "YOUR_SCRAPERAPI_KEY"
class TravelScraper:
def __init__(self, api_key):
self.api_key = api_key
def scrape(self, url, country="us", render=True):
params = {
"api_key": self.api_key,
"url": url,
"render": "true" if render else "false",
"country_code": country,
}
resp = requests.get(
"https://api.scraperapi.com", params=params, timeout=90
)
return BeautifulSoup(resp.text, "html.parser")
ScraperAPI handles JavaScript rendering and anti-bot bypasses that travel sites rely on.
Scraping Hotel Prices
import re
def scrape_hotel_prices(self, city, checkin, checkout, guests=2):
checkin_str = checkin.strftime("%Y-%m-%d")
checkout_str = checkout.strftime("%Y-%m-%d")
url = (
f"https://www.booking.com/searchresults.html"
f"?ss={city}&checkin={checkin_str}&checkout={checkout_str}"
f"&group_adults={guests}&no_rooms=1&order=price"
)
soup = self.scrape(url)
hotels = []
for card in soup.find_all("div", {"data-testid": "property-card"}):
name_el = card.find("div", {"data-testid": "title"})
price_el = card.find("span", {"data-testid": "price-and-discounted-price"})
rating_el = card.find("div", {"data-testid": "review-score"})
if name_el and price_el:
hotels.append({
"name": name_el.get_text(strip=True),
"price_per_night": parse_price(price_el.get_text(strip=True)),
"rating": rating_el.get_text(strip=True) if rating_el else None,
"city": city,
})
return hotels
def parse_price(text):
nums = re.findall(r"[\d,]+", text.replace(",", ""))
return float(nums[0]) if nums else None
Scraping Flight Prices
def scrape_flights(self, origin, dest, date):
date_str = date.strftime("%Y-%m-%d")
url = f"https://www.kayak.com/flights/{origin}-{dest}/{date_str}?sort=bestflight_a"
soup = self.scrape(url)
flights = []
for result in soup.find_all("div", class_=lambda c: c and "resultInner" in c):
price_el = result.find("span", class_=lambda c: c and "price" in str(c).lower())
airline_el = result.find("span", class_=lambda c: c and "airline" in str(c).lower())
if price_el:
flights.append({
"price": parse_price(price_el.get_text(strip=True)),
"airline": airline_el.get_text(strip=True) if airline_el else "Unknown",
"route": f"{origin} -> {dest}",
"date": date_str,
})
return flights
Car Rental Price Comparison
def scrape_car_rentals(self, location, pickup_date, return_date):
pickup_str = pickup_date.strftime("%Y-%m-%d")
return_str = return_date.strftime("%Y-%m-%d")
url = f"https://www.kayak.com/cars/{location}/{pickup_str}/{return_str}"
soup = self.scrape(url)
rentals = []
for card in soup.find_all("div", class_=lambda c: c and "resultCard" in str(c)):
company_el = card.find("span", class_=lambda c: c and "company" in str(c).lower())
price_el = card.find("span", class_=lambda c: c and "price" in str(c).lower())
if price_el:
rentals.append({
"company": company_el.get_text(strip=True) if company_el else "Unknown",
"total_price": parse_price(price_el.get_text(strip=True)),
"location": location,
})
return rentals
Geo-Pricing Detection
Travel prices vary by apparent location. Test with geo-targeted proxies via ThorData:
def detect_geo_pricing(scraper, url, countries):
prices_by_country = {}
for country in countries:
soup = scraper.scrape(url, country=country)
price_el = soup.select_one("[data-testid='price-and-discounted-price']")
if price_el:
prices_by_country[country] = parse_price(price_el.get_text())
time.sleep(3)
return prices_by_country
geo_prices = detect_geo_pricing(
TravelScraper(API_KEY),
"https://www.booking.com/hotel/fr/example.html",
["us", "uk", "de", "in", "br"]
)
for country, price in geo_prices.items():
print(f"{country.upper()}: ${price}")
Price Tracking Over Time
import sqlite3
def track_travel_prices(db_path="travel_prices.db"):
conn = sqlite3.connect(db_path)
conn.execute('''
CREATE TABLE IF NOT EXISTS travel_prices (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category TEXT, name TEXT, price REAL,
route TEXT, travel_date TEXT, scraped_at TEXT
)
''')
scraper = TravelScraper(API_KEY)
checkin = datetime.now() + timedelta(days=30)
checkout = checkin + timedelta(days=3)
hotels = scraper.scrape_hotel_prices("Paris", checkin, checkout)
for h in hotels[:5]:
conn.execute(
"INSERT INTO travel_prices VALUES (NULL, ?, ?, ?, ?, ?, ?)",
("hotel", h["name"], h["price_per_night"], h["city"],
str(checkin.date()), datetime.utcnow().isoformat())
)
conn.commit()
track_travel_prices()
Monitor scraper reliability with ScrapeOps — booking engines change layouts frequently.
Travel data scraping requires robust infrastructure, but the payoff is enormous. Price comparison tools, fare alerts, and travel analytics all depend on reliable extraction from booking engines.
Happy scraping!
Top comments (0)