DEV Community

agenthustler
agenthustler

Posted on

How to Scrape Trustpilot in 2026: Reviews, Ratings, and Business Data

Trustpilot hosts over 300 million reviews for 1 million+ businesses. For brand monitoring, competitive intelligence, or market research — programmatic access to this data is invaluable.

This guide covers how to scrape Trustpilot reviews and business data in 2026 with Python, including working code examples and strategies for handling anti-bot protections.

What Trustpilot Data Can You Extract?

  • Reviews: text, star rating, date, author, reply from business
  • Business profiles: overall rating, total reviews, TrustScore, categories, location
  • Review statistics: rating distribution, review frequency over time
  • Reviewer profiles: number of reviews, location, verification status

Method 1: Scraping with Python + BeautifulSoup

Trustpilot renders most content server-side, making it straightforward to parse:

import requests
from bs4 import BeautifulSoup
import json
import time
import random

class TrustpilotScraper:
    def __init__(self, proxy_url: str = None):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                          'AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/125.0.0.0 Safari/537.36',
            'Accept-Language': 'en-US,en;q=0.9',
        })
        if proxy_url:
            self.session.proxies = {
                'http': proxy_url,
                'https': proxy_url,
            }

    def get_business_info(self, domain: str) -> dict:
        """Scrape business profile from Trustpilot."""
        url = f'https://www.trustpilot.com/review/{domain}'
        response = self.session.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract JSON-LD structured data
        script_tag = soup.find('script', type='application/ld+json')
        if script_tag:
            data = json.loads(script_tag.string)
            return {
                'name': data.get('name'),
                'rating': data.get('aggregateRating', {}).get('ratingValue'),
                'review_count': data.get('aggregateRating', {}).get('reviewCount'),
                'url': url,
            }
        return {}

    def get_reviews(self, domain: str, pages: int = 5) -> list:
        """Scrape reviews for a business."""
        all_reviews = []

        for page in range(1, pages + 1):
            url = f'https://www.trustpilot.com/review/{domain}?page={page}'
            response = self.session.get(url)

            if response.status_code != 200:
                break

            soup = BeautifulSoup(response.text, 'html.parser')
            review_cards = soup.find_all(
                'article', {'data-service-review-card-paper': 'true'}
            )

            if not review_cards:
                # Try alternative selector
                review_cards = soup.find_all(
                    'div', class_=lambda c: c and 'reviewCard' in c
                )

            for card in review_cards:
                review = self._parse_review_card(card)
                if review:
                    all_reviews.append(review)

            # Polite delay between pages
            time.sleep(random.uniform(2, 4))

        return all_reviews

    def _parse_review_card(self, card) -> dict:
        """Extract data from a single review card."""
        review = {}

        # Rating (from star image alt text or data attribute)
        star_elem = card.find('img', alt=lambda a: a and 'Rated' in str(a))
        if star_elem:
            alt = star_elem.get('alt', '')
            # Extract number from 'Rated 5 out of 5 stars'
            parts = alt.split()
            for i, part in enumerate(parts):
                if part == 'Rated' and i + 1 < len(parts):
                    try:
                        review['rating'] = int(parts[i + 1])
                    except ValueError:
                        pass

        # Review title
        title_elem = card.find('h2') or card.find(
            'a', {'data-review-title-typography': 'true'}
        )
        if title_elem:
            review['title'] = title_elem.get_text(strip=True)

        # Review text
        text_elem = card.find(
            'p', {'data-service-review-text-typography': 'true'}
        )
        if text_elem:
            review['text'] = text_elem.get_text(strip=True)

        # Author
        author_elem = card.find(
            'span', {'data-consumer-name-typography': 'true'}
        )
        if author_elem:
            review['author'] = author_elem.get_text(strip=True)

        # Date
        time_elem = card.find('time')
        if time_elem:
            review['date'] = time_elem.get('datetime', '')

        return review if review else None


# Usage
scraper = TrustpilotScraper()

# Get business overview
business = scraper.get_business_info('amazon.com')
print(f"{business['name']}: {business['rating']}/5 ({business['review_count']} reviews)")

# Get reviews
reviews = scraper.get_reviews('amazon.com', pages=3)
for r in reviews[:5]:
    print(f"  [{r.get('rating', '?')}★] {r.get('title', 'No title')}")
Enter fullscreen mode Exit fullscreen mode

Method 2: Using Trustpilot's Hidden API

Trustpilot's frontend calls internal API endpoints that return clean JSON:

import requests

def get_reviews_api(business_unit_id: str, page: int = 1) -> dict:
    """Fetch reviews from Trustpilot's internal API."""
    url = (
        f'https://www.trustpilot.com/api/categoriespages/'
        f'{business_unit_id}/reviews'
    )
    params = {
        'locale': 'en-US',
        'page': page,
        'perPage': 20,
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                      'AppleWebKit/537.36',
        'Accept': 'application/json',
    }

    response = requests.get(url, params=params, headers=headers)
    return response.json()


def find_business_unit_id(domain: str) -> str:
    """Find the business unit ID by scraping the review page."""
    url = f'https://www.trustpilot.com/review/{domain}'
    response = requests.get(url, headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                      'AppleWebKit/537.36'
    })
    soup = BeautifulSoup(response.text, 'html.parser')

    # Business unit ID is embedded in page data
    scripts = soup.find_all('script')
    for script in scripts:
        text = script.string or ''
        if 'businessUnitId' in text:
            import re
            match = re.search(
                r'"businessUnitId":"([a-f0-9]+)"', text
            )
            if match:
                return match.group(1)
    return ''
Enter fullscreen mode Exit fullscreen mode

Handling Anti-Bot Protection

Trustpilot uses Cloudflare and its own bot detection. Here's how to handle it:

Proxy Rotation

Residential proxies are essential for any volume:

# Using ThorData residential proxies
# Sign up: https://affiliate.thordata.com/0a0x4nzu7tvv
proxy_url = 'http://user:pass@proxy.thordata.com:9090'
scraper = TrustpilotScraper(proxy_url=proxy_url)
Enter fullscreen mode Exit fullscreen mode

Smart Headers with ScrapeOps

ScrapeOps provides fake browser headers and proxy aggregation to improve success rates:

import requests

SCRAPEOPS_KEY = 'YOUR_KEY'

def get_scrapeops_headers() -> dict:
    """Get realistic browser headers from ScrapeOps."""
    response = requests.get(
        'https://headers.scrapeops.io/v1/browser-headers',
        params={'api_key': SCRAPEOPS_KEY, 'num_results': 1}
    )
    headers_list = response.json().get('result', [])
    return headers_list[0] if headers_list else {}


def scrape_with_scrapeops(url: str) -> str:
    """Use ScrapeOps proxy aggregator for reliable scraping."""
    response = requests.get(
        'https://proxy.scrapeops.io/v1/',
        params={
            'api_key': SCRAPEOPS_KEY,
            'url': url,
            'render_js': 'false',
        }
    )
    return response.text
Enter fullscreen mode Exit fullscreen mode

Method 3: Using a Managed Scraper

For production use cases where you need reliable, maintained infrastructure:

Trustpilot Scraper on Apify handles proxy rotation, CAPTCHA solving, and anti-bot bypassing out of the box. You provide business URLs, it returns structured review data in JSON, CSV, or Excel format.

This is the fastest path from zero to data if you don't want to maintain scraping infrastructure.

Use Cases for Trustpilot Data

Brand Monitoring

Track your company's review trends over time:

from collections import Counter
from datetime import datetime

def analyze_sentiment_trend(reviews: list) -> dict:
    """Analyze rating distribution and trends."""
    ratings = Counter(r.get('rating', 0) for r in reviews)

    # Monthly breakdown
    monthly = {}
    for review in reviews:
        date_str = review.get('date', '')
        if date_str:
            month = date_str[:7]  # YYYY-MM
            if month not in monthly:
                monthly[month] = []
            monthly[month].append(review.get('rating', 0))

    monthly_avg = {
        month: sum(ratings_list) / len(ratings_list)
        for month, ratings_list in sorted(monthly.items())
    }

    return {
        'total_reviews': len(reviews),
        'rating_distribution': dict(ratings),
        'average_rating': (
            sum(r.get('rating', 0) for r in reviews) / len(reviews)
            if reviews else 0
        ),
        'monthly_averages': monthly_avg,
    }

reviews = scraper.get_reviews('yourcompany.com', pages=10)
trends = analyze_sentiment_trend(reviews)
print(f"Average: {trends['average_rating']:.1f}/5")
print(f"Distribution: {trends['rating_distribution']}")
Enter fullscreen mode Exit fullscreen mode

Competitive Intelligence

def compare_competitors(domains: list) -> list:
    """Compare Trustpilot scores across competitors."""
    scraper = TrustpilotScraper()
    results = []

    for domain in domains:
        info = scraper.get_business_info(domain)
        results.append({
            'domain': domain,
            'rating': info.get('rating', 'N/A'),
            'reviews': info.get('review_count', 0),
        })
        time.sleep(random.uniform(3, 6))

    results.sort(key=lambda x: float(x['rating'] or 0), reverse=True)
    return results


competitors = ['shopify.com', 'woocommerce.com', 'bigcommerce.com']
ranking = compare_competitors(competitors)
for r in ranking:
    print(f"{r['domain']}: {r['rating']}/5 ({r['reviews']} reviews)")
Enter fullscreen mode Exit fullscreen mode

Storing Results

import csv
import json

def export_reviews(reviews: list, domain: str):
    """Export reviews to CSV and JSON."""
    # CSV
    csv_file = f'trustpilot_{domain.replace(".", "_")}.csv'
    if reviews:
        with open(csv_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=reviews[0].keys())
            writer.writeheader()
            writer.writerows(reviews)

    # JSON
    json_file = f'trustpilot_{domain.replace(".", "_")}.json'
    with open(json_file, 'w', encoding='utf-8') as f:
        json.dump(reviews, f, indent=2, ensure_ascii=False)

    print(f'Saved {len(reviews)} reviews to {csv_file} and {json_file}')
Enter fullscreen mode Exit fullscreen mode

Ethical Scraping Guidelines

  • Respect rate limits: Add delays between requests (2-5 seconds minimum)
  • Check robots.txt: Trustpilot's robots.txt allows scraping of review pages
  • Don't scrape personal data beyond what's publicly visible
  • Comply with GDPR/CCPA when processing reviewer data in EU/California
  • Cache results: Don't re-scrape data you already have

Wrapping Up

Trustpilot scraping in 2026 comes down to three approaches:

  1. Quick and simple: BeautifulSoup + requests with a residential proxy
  2. Smarter scraping: Use ScrapeOps for header rotation and proxy aggregation
  3. Production-ready: Use a managed Trustpilot scraper for reliability at scale

Start with the Python examples above, and scale up to managed solutions when your volume demands it. The key is respecting the platform while getting the data you need.

Top comments (0)