Trustpilot hosts over 300 million reviews for 1 million+ businesses. For brand monitoring, competitive intelligence, or market research — programmatic access to this data is invaluable.
This guide covers how to scrape Trustpilot reviews and business data in 2026 with Python, including working code examples and strategies for handling anti-bot protections.
What Trustpilot Data Can You Extract?
- Reviews: text, star rating, date, author, reply from business
- Business profiles: overall rating, total reviews, TrustScore, categories, location
- Review statistics: rating distribution, review frequency over time
- Reviewer profiles: number of reviews, location, verification status
Method 1: Scraping with Python + BeautifulSoup
Trustpilot renders most content server-side, making it straightforward to parse:
import requests
from bs4 import BeautifulSoup
import json
import time
import random
class TrustpilotScraper:
def __init__(self, proxy_url: str = None):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/125.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.9',
})
if proxy_url:
self.session.proxies = {
'http': proxy_url,
'https': proxy_url,
}
def get_business_info(self, domain: str) -> dict:
"""Scrape business profile from Trustpilot."""
url = f'https://www.trustpilot.com/review/{domain}'
response = self.session.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract JSON-LD structured data
script_tag = soup.find('script', type='application/ld+json')
if script_tag:
data = json.loads(script_tag.string)
return {
'name': data.get('name'),
'rating': data.get('aggregateRating', {}).get('ratingValue'),
'review_count': data.get('aggregateRating', {}).get('reviewCount'),
'url': url,
}
return {}
def get_reviews(self, domain: str, pages: int = 5) -> list:
"""Scrape reviews for a business."""
all_reviews = []
for page in range(1, pages + 1):
url = f'https://www.trustpilot.com/review/{domain}?page={page}'
response = self.session.get(url)
if response.status_code != 200:
break
soup = BeautifulSoup(response.text, 'html.parser')
review_cards = soup.find_all(
'article', {'data-service-review-card-paper': 'true'}
)
if not review_cards:
# Try alternative selector
review_cards = soup.find_all(
'div', class_=lambda c: c and 'reviewCard' in c
)
for card in review_cards:
review = self._parse_review_card(card)
if review:
all_reviews.append(review)
# Polite delay between pages
time.sleep(random.uniform(2, 4))
return all_reviews
def _parse_review_card(self, card) -> dict:
"""Extract data from a single review card."""
review = {}
# Rating (from star image alt text or data attribute)
star_elem = card.find('img', alt=lambda a: a and 'Rated' in str(a))
if star_elem:
alt = star_elem.get('alt', '')
# Extract number from 'Rated 5 out of 5 stars'
parts = alt.split()
for i, part in enumerate(parts):
if part == 'Rated' and i + 1 < len(parts):
try:
review['rating'] = int(parts[i + 1])
except ValueError:
pass
# Review title
title_elem = card.find('h2') or card.find(
'a', {'data-review-title-typography': 'true'}
)
if title_elem:
review['title'] = title_elem.get_text(strip=True)
# Review text
text_elem = card.find(
'p', {'data-service-review-text-typography': 'true'}
)
if text_elem:
review['text'] = text_elem.get_text(strip=True)
# Author
author_elem = card.find(
'span', {'data-consumer-name-typography': 'true'}
)
if author_elem:
review['author'] = author_elem.get_text(strip=True)
# Date
time_elem = card.find('time')
if time_elem:
review['date'] = time_elem.get('datetime', '')
return review if review else None
# Usage
scraper = TrustpilotScraper()
# Get business overview
business = scraper.get_business_info('amazon.com')
print(f"{business['name']}: {business['rating']}/5 ({business['review_count']} reviews)")
# Get reviews
reviews = scraper.get_reviews('amazon.com', pages=3)
for r in reviews[:5]:
print(f" [{r.get('rating', '?')}★] {r.get('title', 'No title')}")
Method 2: Using Trustpilot's Hidden API
Trustpilot's frontend calls internal API endpoints that return clean JSON:
import requests
def get_reviews_api(business_unit_id: str, page: int = 1) -> dict:
"""Fetch reviews from Trustpilot's internal API."""
url = (
f'https://www.trustpilot.com/api/categoriespages/'
f'{business_unit_id}/reviews'
)
params = {
'locale': 'en-US',
'page': page,
'perPage': 20,
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36',
'Accept': 'application/json',
}
response = requests.get(url, params=params, headers=headers)
return response.json()
def find_business_unit_id(domain: str) -> str:
"""Find the business unit ID by scraping the review page."""
url = f'https://www.trustpilot.com/review/{domain}'
response = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36'
})
soup = BeautifulSoup(response.text, 'html.parser')
# Business unit ID is embedded in page data
scripts = soup.find_all('script')
for script in scripts:
text = script.string or ''
if 'businessUnitId' in text:
import re
match = re.search(
r'"businessUnitId":"([a-f0-9]+)"', text
)
if match:
return match.group(1)
return ''
Handling Anti-Bot Protection
Trustpilot uses Cloudflare and its own bot detection. Here's how to handle it:
Proxy Rotation
Residential proxies are essential for any volume:
# Using ThorData residential proxies
# Sign up: https://affiliate.thordata.com/0a0x4nzu7tvv
proxy_url = 'http://user:pass@proxy.thordata.com:9090'
scraper = TrustpilotScraper(proxy_url=proxy_url)
Smart Headers with ScrapeOps
ScrapeOps provides fake browser headers and proxy aggregation to improve success rates:
import requests
SCRAPEOPS_KEY = 'YOUR_KEY'
def get_scrapeops_headers() -> dict:
"""Get realistic browser headers from ScrapeOps."""
response = requests.get(
'https://headers.scrapeops.io/v1/browser-headers',
params={'api_key': SCRAPEOPS_KEY, 'num_results': 1}
)
headers_list = response.json().get('result', [])
return headers_list[0] if headers_list else {}
def scrape_with_scrapeops(url: str) -> str:
"""Use ScrapeOps proxy aggregator for reliable scraping."""
response = requests.get(
'https://proxy.scrapeops.io/v1/',
params={
'api_key': SCRAPEOPS_KEY,
'url': url,
'render_js': 'false',
}
)
return response.text
Method 3: Using a Managed Scraper
For production use cases where you need reliable, maintained infrastructure:
Trustpilot Scraper on Apify handles proxy rotation, CAPTCHA solving, and anti-bot bypassing out of the box. You provide business URLs, it returns structured review data in JSON, CSV, or Excel format.
This is the fastest path from zero to data if you don't want to maintain scraping infrastructure.
Use Cases for Trustpilot Data
Brand Monitoring
Track your company's review trends over time:
from collections import Counter
from datetime import datetime
def analyze_sentiment_trend(reviews: list) -> dict:
"""Analyze rating distribution and trends."""
ratings = Counter(r.get('rating', 0) for r in reviews)
# Monthly breakdown
monthly = {}
for review in reviews:
date_str = review.get('date', '')
if date_str:
month = date_str[:7] # YYYY-MM
if month not in monthly:
monthly[month] = []
monthly[month].append(review.get('rating', 0))
monthly_avg = {
month: sum(ratings_list) / len(ratings_list)
for month, ratings_list in sorted(monthly.items())
}
return {
'total_reviews': len(reviews),
'rating_distribution': dict(ratings),
'average_rating': (
sum(r.get('rating', 0) for r in reviews) / len(reviews)
if reviews else 0
),
'monthly_averages': monthly_avg,
}
reviews = scraper.get_reviews('yourcompany.com', pages=10)
trends = analyze_sentiment_trend(reviews)
print(f"Average: {trends['average_rating']:.1f}/5")
print(f"Distribution: {trends['rating_distribution']}")
Competitive Intelligence
def compare_competitors(domains: list) -> list:
"""Compare Trustpilot scores across competitors."""
scraper = TrustpilotScraper()
results = []
for domain in domains:
info = scraper.get_business_info(domain)
results.append({
'domain': domain,
'rating': info.get('rating', 'N/A'),
'reviews': info.get('review_count', 0),
})
time.sleep(random.uniform(3, 6))
results.sort(key=lambda x: float(x['rating'] or 0), reverse=True)
return results
competitors = ['shopify.com', 'woocommerce.com', 'bigcommerce.com']
ranking = compare_competitors(competitors)
for r in ranking:
print(f"{r['domain']}: {r['rating']}/5 ({r['reviews']} reviews)")
Storing Results
import csv
import json
def export_reviews(reviews: list, domain: str):
"""Export reviews to CSV and JSON."""
# CSV
csv_file = f'trustpilot_{domain.replace(".", "_")}.csv'
if reviews:
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=reviews[0].keys())
writer.writeheader()
writer.writerows(reviews)
# JSON
json_file = f'trustpilot_{domain.replace(".", "_")}.json'
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(reviews, f, indent=2, ensure_ascii=False)
print(f'Saved {len(reviews)} reviews to {csv_file} and {json_file}')
Ethical Scraping Guidelines
- Respect rate limits: Add delays between requests (2-5 seconds minimum)
- Check robots.txt: Trustpilot's robots.txt allows scraping of review pages
- Don't scrape personal data beyond what's publicly visible
- Comply with GDPR/CCPA when processing reviewer data in EU/California
- Cache results: Don't re-scrape data you already have
Wrapping Up
Trustpilot scraping in 2026 comes down to three approaches:
- Quick and simple: BeautifulSoup + requests with a residential proxy
- Smarter scraping: Use ScrapeOps for header rotation and proxy aggregation
- Production-ready: Use a managed Trustpilot scraper for reliability at scale
Start with the Python examples above, and scale up to managed solutions when your volume demands it. The key is respecting the platform while getting the data you need.
Top comments (0)