TripAdvisor hosts 1 billion+ reviews across hotels, restaurants, attractions, and experiences. For travel, hospitality, and competitive research, this is one of the richest review datasets available. Here's how to extract it.
What's accessible
TripAdvisor public pages include:
- Business name, category, location
- Overall rating and review count by category (service, value, cleanliness, etc.)
- Individual reviews: text, rating, reviewer location, date, trip type
- Business responses to reviews
- Photos
- Ranking within category ("#3 of 847 Hotels in Paris")
- Price range and amenities
Method 1: Web scraping with Playwright
TripAdvisor uses aggressive bot detection. Use browser automation with stealth:
from playwright.async_api import async_playwright
import asyncio
async def scrape_tripadvisor_reviews(url: str, max_reviews: int = 100) -> list:
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=["--disable-blink-features=AutomationControlled"]
)
context = await browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/122.0.0.0",
viewport={"width": 1280, "height": 800}
)
page = await context.new_page()
all_reviews = []
current_url = url
while len(all_reviews) < max_reviews:
await page.goto(current_url)
await page.wait_for_selector('[data-automation="reviewCard"]', timeout=15000)
reviews = await page.evaluate("""
Array.from(document.querySelectorAll('[data-automation="reviewCard"]')).map(card => ({
title: card.querySelector('[data-automation="reviewTitle"]')?.innerText,
rating: card.querySelector('[data-automation="bubbleRatingValue"]')?.innerText,
text: card.querySelector('[data-automation="reviewBody"] span')?.innerText,
date: card.querySelector('[data-automation="reviewDate"]')?.innerText,
reviewer_location: card.querySelector('[data-automation="reviewerLocation"]')?.innerText,
trip_type: card.querySelector('[data-automation="tripType"]')?.innerText,
}))
""")
all_reviews.extend(reviews)
# Find next page button
next_btn = await page.query_selector('[data-automation="page-next"]')
if not next_btn or len(all_reviews) >= max_reviews:
break
# Get next page URL
next_href = await next_btn.get_attribute("href")
if not next_href:
break
current_url = f"https://www.tripadvisor.com{next_href}"
await browser.close()
return all_reviews[:max_reviews]
# Scrape reviews for a specific hotel
reviews = asyncio.run(
scrape_tripadvisor_reviews(
"https://www.tripadvisor.com/Hotel_Review-g187147-d188603-Reviews-Hotel_Le_Marais-Paris_Ile_de_France.html",
max_reviews=200
)
)
print(f"Scraped {len(reviews)} reviews")
Method 2: TripAdvisor Content API (official)
TripAdvisor has an official Content API. Limited free tier, requires approval:
import requests
# Apply at developers.tripadvisor.com
TA_API_KEY = "your_api_key"
def get_location_details(location_id: str) -> dict:
url = f"https://api.content.tripadvisor.com/api/v1/location/{location_id}/details"
params = {
"key": TA_API_KEY,
"language": "en",
"currency": "USD"
}
response = requests.get(url, params=params)
return response.json() if response.status_code == 200 else {}
def get_location_reviews(location_id: str, limit: int = 5) -> list:
url = f"https://api.content.tripadvisor.com/api/v1/location/{location_id}/reviews"
params = {
"key": TA_API_KEY,
"language": "en",
"limit": limit
}
response = requests.get(url, params=params)
return response.json().get("data", []) if response.status_code == 200 else []
def search_locations(query: str, category: str = "hotels") -> list:
url = "https://api.content.tripadvisor.com/api/v1/location/search"
params = {
"key": TA_API_KEY,
"searchQuery": query,
"category": category,
"language": "en"
}
response = requests.get(url, params=params)
return response.json().get("data", []) if response.status_code == 200 else []
# Example usage
hotels = search_locations("Paris hotels", "hotels")
for hotel in hotels[:3]:
location_id = hotel["location_id"]
details = get_location_details(location_id)
reviews = get_location_reviews(location_id)
print(f"{details.get('name')}: {details.get('rating')} ({details.get('num_reviews')} reviews)")
for r in reviews[:2]:
print(f" - {r.get('title')} ({r.get('rating')}/5)")
Method 3: Pre-built TripAdvisor scraper
The TripAdvisor Reviews Scraper on Apify handles pagination, bot detection, and multi-page extraction automatically.
Input: TripAdvisor location URL or search query
Output: structured reviews with sentiment indicators
40+ production runs. Pay-per-result pricing.
Analyzing review data
Once you have reviews, extract signals:
import pandas as pd
from textblob import TextBlob
def analyze_reviews(reviews: list) -> dict:
df = pd.DataFrame(reviews)
# Sentiment analysis
df["sentiment"] = df["text"].apply(
lambda t: TextBlob(str(t)).sentiment.polarity if t else 0
)
# Rating distribution
rating_dist = df["rating"].value_counts().to_dict()
# Common topics in negative reviews
negative = df[df["rating"].astype(float) <= 2]["text"].dropna()
return {
"total_reviews": len(df),
"avg_sentiment": df["sentiment"].mean(),
"rating_distribution": rating_dist,
"negative_review_count": len(negative),
"avg_rating": df["rating"].astype(float).mean(),
}
analysis = analyze_reviews(reviews)
print(f"Avg rating: {analysis['avg_rating']:.2f}")
print(f"Avg sentiment: {analysis['avg_sentiment']:.2f}")
print(f"Distribution: {analysis['rating_distribution']}")
Use cases
- Competitive hotel/restaurant analysis: Compare your ratings vs competitors
- Guest sentiment tracking: Monitor what guests complain about over time
- Market research: Analyze hotel/restaurant quality by neighborhood or price tier
- Travel app: Build "best reviewed" listings for a destination
- Brand monitoring: Track mentions across hospitality venues
Anti-detection notes
TripAdvisor uses Imperva/Incapsula protection. At scale you need:
- Residential proxies (required — datacenter IPs blocked instantly)
- Realistic request timing (3-8 second delays)
- Session cookies from browser warm-up
- Vary viewport sizes
For small research projects (< 500 reviews), the Playwright approach works. For 10,000+ reviews, use the managed actor with proxy rotation built in.
n8n AI Automation Pack ($39) — 5 production-ready workflows
Skip the setup
Apify Scrapers Bundle — $29 one-time
Includes TripAdvisor Reviews Scraper, Yelp Business Scraper, Restaurant Menu Scraper, and 32+ more.
Top comments (0)