DEV Community

agenthustler
agenthustler

Posted on

How to Scrape Booking Engines: Hotels, Flights, and Car Rentals

Travel booking data is among the most valuable — and hardest to scrape — on the web. Booking.com, Expedia, and Kayak use aggressive anti-bot measures, dynamic pricing, and JavaScript-heavy interfaces. Here's how to extract travel pricing data reliably.

Why Travel Data Is Challenging

  • Heavy JavaScript rendering (React/Next.js frontends)
  • CAPTCHAs and bot detection (Akamai, PerimeterX, DataDome)
  • Session-based pricing (cookies influence displayed prices)
  • Dynamic content loaded via XHR/API calls
  • Geo-dependent pricing (different prices by location)

Setting Up Robust Scraping

import requests
from bs4 import BeautifulSoup
import json
import time
from datetime import datetime, timedelta

API_KEY = "YOUR_SCRAPERAPI_KEY"

class TravelScraper:
    def __init__(self, api_key):
        self.api_key = api_key

    def scrape(self, url, country="us", render=True):
        params = {
            "api_key": self.api_key,
            "url": url,
            "render": "true" if render else "false",
            "country_code": country,
        }
        resp = requests.get(
            "https://api.scraperapi.com", params=params, timeout=90
        )
        return BeautifulSoup(resp.text, "html.parser")
Enter fullscreen mode Exit fullscreen mode

ScraperAPI handles JavaScript rendering and anti-bot bypasses that travel sites rely on.

Scraping Hotel Prices

import re

def scrape_hotel_prices(self, city, checkin, checkout, guests=2):
    checkin_str = checkin.strftime("%Y-%m-%d")
    checkout_str = checkout.strftime("%Y-%m-%d")
    url = (
        f"https://www.booking.com/searchresults.html"
        f"?ss={city}&checkin={checkin_str}&checkout={checkout_str}"
        f"&group_adults={guests}&no_rooms=1&order=price"
    )
    soup = self.scrape(url)
    hotels = []
    for card in soup.find_all("div", {"data-testid": "property-card"}):
        name_el = card.find("div", {"data-testid": "title"})
        price_el = card.find("span", {"data-testid": "price-and-discounted-price"})
        rating_el = card.find("div", {"data-testid": "review-score"})
        if name_el and price_el:
            hotels.append({
                "name": name_el.get_text(strip=True),
                "price_per_night": parse_price(price_el.get_text(strip=True)),
                "rating": rating_el.get_text(strip=True) if rating_el else None,
                "city": city,
            })
    return hotels

def parse_price(text):
    nums = re.findall(r"[\d,]+", text.replace(",", ""))
    return float(nums[0]) if nums else None
Enter fullscreen mode Exit fullscreen mode

Scraping Flight Prices

def scrape_flights(self, origin, dest, date):
    date_str = date.strftime("%Y-%m-%d")
    url = f"https://www.kayak.com/flights/{origin}-{dest}/{date_str}?sort=bestflight_a"
    soup = self.scrape(url)
    flights = []
    for result in soup.find_all("div", class_=lambda c: c and "resultInner" in c):
        price_el = result.find("span", class_=lambda c: c and "price" in str(c).lower())
        airline_el = result.find("span", class_=lambda c: c and "airline" in str(c).lower())
        if price_el:
            flights.append({
                "price": parse_price(price_el.get_text(strip=True)),
                "airline": airline_el.get_text(strip=True) if airline_el else "Unknown",
                "route": f"{origin} -> {dest}",
                "date": date_str,
            })
    return flights
Enter fullscreen mode Exit fullscreen mode

Car Rental Price Comparison

def scrape_car_rentals(self, location, pickup_date, return_date):
    pickup_str = pickup_date.strftime("%Y-%m-%d")
    return_str = return_date.strftime("%Y-%m-%d")
    url = f"https://www.kayak.com/cars/{location}/{pickup_str}/{return_str}"
    soup = self.scrape(url)
    rentals = []
    for card in soup.find_all("div", class_=lambda c: c and "resultCard" in str(c)):
        company_el = card.find("span", class_=lambda c: c and "company" in str(c).lower())
        price_el = card.find("span", class_=lambda c: c and "price" in str(c).lower())
        if price_el:
            rentals.append({
                "company": company_el.get_text(strip=True) if company_el else "Unknown",
                "total_price": parse_price(price_el.get_text(strip=True)),
                "location": location,
            })
    return rentals
Enter fullscreen mode Exit fullscreen mode

Geo-Pricing Detection

Travel prices vary by apparent location. Test with geo-targeted proxies via ThorData:

def detect_geo_pricing(scraper, url, countries):
    prices_by_country = {}
    for country in countries:
        soup = scraper.scrape(url, country=country)
        price_el = soup.select_one("[data-testid='price-and-discounted-price']")
        if price_el:
            prices_by_country[country] = parse_price(price_el.get_text())
        time.sleep(3)
    return prices_by_country

geo_prices = detect_geo_pricing(
    TravelScraper(API_KEY),
    "https://www.booking.com/hotel/fr/example.html",
    ["us", "uk", "de", "in", "br"]
)
for country, price in geo_prices.items():
    print(f"{country.upper()}: ${price}")
Enter fullscreen mode Exit fullscreen mode

Price Tracking Over Time

import sqlite3

def track_travel_prices(db_path="travel_prices.db"):
    conn = sqlite3.connect(db_path)
    conn.execute('''
        CREATE TABLE IF NOT EXISTS travel_prices (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            category TEXT, name TEXT, price REAL,
            route TEXT, travel_date TEXT, scraped_at TEXT
        )
    ''')
    scraper = TravelScraper(API_KEY)
    checkin = datetime.now() + timedelta(days=30)
    checkout = checkin + timedelta(days=3)
    hotels = scraper.scrape_hotel_prices("Paris", checkin, checkout)
    for h in hotels[:5]:
        conn.execute(
            "INSERT INTO travel_prices VALUES (NULL, ?, ?, ?, ?, ?, ?)",
            ("hotel", h["name"], h["price_per_night"], h["city"],
             str(checkin.date()), datetime.utcnow().isoformat())
        )
    conn.commit()

track_travel_prices()
Enter fullscreen mode Exit fullscreen mode

Monitor scraper reliability with ScrapeOps — booking engines change layouts frequently.


Travel data scraping requires robust infrastructure, but the payoff is enormous. Price comparison tools, fare alerts, and travel analytics all depend on reliable extraction from booking engines.

Happy scraping!

Top comments (0)