Trustpilot hosts over 200 million reviews for businesses worldwide. This data is invaluable for reputation monitoring, competitive analysis, and customer sentiment research. Here's how to extract Trustpilot reviews programmatically with Python.
Why Scrape Trustpilot?
- Reputation monitoring: Track your brand's reviews automatically
- Competitive analysis: Compare review scores across competitors
- Sentiment analysis: Understand what customers love and hate
- Market research: Evaluate business quality in any industry
- Lead generation: Find businesses with poor reviews (opportunity!)
Extracting Business Reviews
import requests
from bs4 import BeautifulSoup
import json
import time
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
def scrape_business_reviews(business_slug, pages=5):
"""Scrape reviews for a business on Trustpilot."""
all_reviews = []
for page in range(1, pages + 1):
url = f"https://www.trustpilot.com/review/{business_slug}?page={page}"
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
print(f"Error on page {page}: {response.status_code}")
break
soup = BeautifulSoup(response.text, "html.parser")
# Trustpilot uses JSON-LD structured data
ld_scripts = soup.find_all("script", {"type": "application/ld+json"})
for script in ld_scripts:
try:
data = json.loads(script.string)
if isinstance(data, dict) and data.get("@type") == "LocalBusiness":
reviews_data = data.get("review", [])
for review in reviews_data:
all_reviews.append({
"author": review.get("author", {}).get("name", ""),
"rating": review.get("reviewRating", {}).get("ratingValue"),
"date": review.get("datePublished", ""),
"title": review.get("headline", ""),
"body": review.get("reviewBody", ""),
})
except json.JSONDecodeError:
continue
# Fallback: parse HTML directly
if not all_reviews:
review_cards = soup.select("[data-review-id]")
for card in review_cards:
rating_el = card.select_one("[data-service-review-rating]")
title_el = card.select_one("[data-service-review-title-typography]")
body_el = card.select_one("[data-service-review-text-typography]")
author_el = card.select_one("[data-consumer-name-typography]")
date_el = card.select_one("time")
all_reviews.append({
"author": author_el.text.strip() if author_el else "",
"rating": int(rating_el["data-service-review-rating"]) if rating_el else None,
"date": date_el["datetime"] if date_el else "",
"title": title_el.text.strip() if title_el else "",
"body": body_el.text.strip() if body_el else "",
})
print(f"Page {page}: {len(all_reviews)} total reviews collected")
time.sleep(2)
return all_reviews
Getting Business Overview
def get_business_info(business_slug):
"""Extract business summary from Trustpilot."""
url = f"https://www.trustpilot.com/review/{business_slug}"
response = requests.get(url, headers=HEADERS)
soup = BeautifulSoup(response.text, "html.parser")
# Extract from JSON-LD
ld_scripts = soup.find_all("script", {"type": "application/ld+json"})
for script in ld_scripts:
try:
data = json.loads(script.string)
if isinstance(data, dict) and "aggregateRating" in data:
agg = data["aggregateRating"]
return {
"name": data.get("name", ""),
"url": data.get("url", ""),
"rating": float(agg.get("ratingValue", 0)),
"review_count": int(agg.get("reviewCount", 0)),
"best_rating": agg.get("bestRating", 5),
"trustpilot_url": url,
}
except (json.JSONDecodeError, ValueError):
continue
return None
Comparing Competitors
def compare_businesses(slugs):
"""Compare Trustpilot ratings across multiple businesses."""
results = []
for slug in slugs:
info = get_business_info(slug)
if info:
reviews = scrape_business_reviews(slug, pages=3)
# Calculate distribution
distribution = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
for r in reviews:
if r["rating"] in distribution:
distribution[r["rating"]] += 1
info["rating_distribution"] = distribution
info["recent_reviews"] = len(reviews)
results.append(info)
time.sleep(3)
# Sort by rating
return sorted(results, key=lambda x: x["rating"], reverse=True)
# Example: Compare cloud hosting providers
providers = ["aws.amazon.com", "cloud.google.com", "azure.microsoft.com"]
comparison = compare_businesses(providers)
for biz in comparison:
print(f"{biz['name']}: {biz['rating']}/5 ({biz['review_count']} reviews)")
Sentiment Analysis
from collections import Counter
def analyze_review_sentiment(reviews):
"""Analyze sentiment patterns in reviews."""
positive = [r for r in reviews if r.get("rating", 0) >= 4]
negative = [r for r in reviews if r.get("rating", 0) <= 2]
neutral = [r for r in reviews if r.get("rating", 0) == 3]
# Extract common words from positive and negative reviews
def get_common_words(review_list):
all_text = " ".join(r.get("body", "").lower() for r in review_list)
words = [w for w in all_text.split() if len(w) > 3]
return Counter(words).most_common(15)
return {
"total_reviews": len(reviews),
"positive_count": len(positive),
"negative_count": len(negative),
"neutral_count": len(neutral),
"positive_pct": round(len(positive) / len(reviews) * 100, 1) if reviews else 0,
"positive_keywords": get_common_words(positive),
"negative_keywords": get_common_words(negative),
}
Production-Ready Trustpilot Scraping
For monitoring multiple businesses or building a comprehensive review database, the Trustpilot Scraper on Apify handles all the complexity — dynamic rendering, pagination, rate limiting, and structured data extraction.
For bypassing Trustpilot's anti-scraping protections at scale, ScraperAPI provides managed proxy rotation and browser rendering.
Data Export
import csv
def export_reviews(reviews, filename="trustpilot_reviews.csv"):
if not reviews:
return
keys = reviews[0].keys()
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=keys)
writer.writeheader()
writer.writerows(reviews)
print(f"Exported {len(reviews)} reviews to {filename}")
Best Practices
- Use JSON-LD first: Trustpilot embeds structured data — much more reliable than HTML parsing
- Rate limit carefully: 2-3 seconds between pages
- Use ScraperAPI for proxy rotation and rendering
- Handle pagination: Trustpilot limits pages to 20 reviews each
- Monitor for changes: Trustpilot frequently updates their frontend
Conclusion
Trustpilot review data is powerful for reputation monitoring, competitive analysis, and market research. Whether you build custom scraping logic or use the Trustpilot Scraper on Apify, the insights from review data can drive real business decisions.
Happy scraping!
Top comments (0)