AliExpress is one of the world's largest e-commerce platforms, with millions of products from Chinese sellers. Extracting product data — prices, reviews, seller ratings — is invaluable for e-commerce intelligence, price monitoring, and market research.
In this guide, I'll walk you through building an AliExpress scraper with Python.
Use Cases for AliExpress Data
- Price comparison: Track product prices across sellers
- Dropshipping research: Find profitable products with good margins
- Competitor monitoring: Watch competitor pricing strategies
- Review sentiment analysis: Understand product quality from buyer feedback
- Supplier evaluation: Compare seller ratings and reliability
Setting Up
import requests
from bs4 import BeautifulSoup
import json
import re
import time
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept-Language": "en-US,en;q=0.9",
}
# ScraperAPI handles proxy rotation and JS rendering
SCRAPER_API_KEY = "YOUR_KEY"
For handling AliExpress's anti-bot protections, ScraperAPI is excellent — it manages proxy rotation and browser rendering automatically.
Extracting Product Details
AliExpress heavily uses JavaScript rendering. Here's how to extract product data:
def scrape_product(product_url):
"""Extract details from an AliExpress product page."""
api_url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url={product_url}&render=true"
response = requests.get(api_url, timeout=60)
soup = BeautifulSoup(response.text, "html.parser")
# AliExpress stores product data in window.runParams
script_tags = soup.find_all("script")
product_data = {}
for script in script_tags:
if script.string and "window.runParams" in script.string:
match = re.search(r'data:\s*(\{.*?\})\s*;', script.string, re.DOTALL)
if match:
try:
product_data = json.loads(match.group(1))
except json.JSONDecodeError:
continue
return parse_product_data(product_data)
def parse_product_data(data):
"""Parse structured product information."""
price_info = data.get("priceModule", {})
title_info = data.get("titleModule", {})
seller_info = data.get("storeModule", {})
feedback = data.get("feedbackModule", {})
return {
"title": title_info.get("subject", ""),
"price": price_info.get("formatedActivityPrice",
price_info.get("formatedPrice", "")),
"original_price": price_info.get("formatedPrice", ""),
"currency": price_info.get("currencyCode", "USD"),
"orders": title_info.get("tradeCount", 0),
"rating": feedback.get("averageStar", 0),
"reviews_count": feedback.get("totalValidNum", 0),
"store_name": seller_info.get("storeName", ""),
"store_rating": seller_info.get("positiveRate", ""),
"store_followers": seller_info.get("followingNumber", 0),
}
Scraping Search Results
def scrape_search_results(query, pages=3):
"""Search AliExpress and extract product listings."""
all_products = []
for page in range(1, pages + 1):
search_url = f"https://www.aliexpress.com/wholesale?SearchText={query}&page={page}"
api_url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url={search_url}&render=true"
response = requests.get(api_url, timeout=60)
soup = BeautifulSoup(response.text, "html.parser")
# Find product cards
items = soup.select("div[class*='search-item-card']")
for item in items:
product = {
"title": item.select_one("h1, h3, [class*='title']"),
"price": item.select_one("[class*='price']"),
"orders": item.select_one("[class*='trade']"),
"rating": item.select_one("[class*='evaluation']"),
"link": item.select_one("a[href*='/item/']"),
}
all_products.append({
"title": product["title"].text.strip() if product["title"] else "",
"price": product["price"].text.strip() if product["price"] else "",
"orders": product["orders"].text.strip() if product["orders"] else "",
"rating": product["rating"].text.strip() if product["rating"] else "",
"url": "https:" + product["link"]["href"] if product["link"] else "",
})
print(f"Page {page}: Found {len(items)} products")
time.sleep(3)
return all_products
Extracting Reviews
def scrape_reviews(product_id, pages=5):
"""Extract product reviews."""
reviews = []
for page in range(1, pages + 1):
review_url = (
f"https://feedback.aliexpress.com/display/productEvaluation.htm"
f"?productId={product_id}&page={page}"
)
api_url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url={review_url}"
response = requests.get(api_url, timeout=30)
soup = BeautifulSoup(response.text, "html.parser")
review_items = soup.select(".feedback-item")
for item in review_items:
reviews.append({
"rating": len(item.select(".star-view .star-active")),
"text": item.select_one(".buyer-feedback span").text.strip()
if item.select_one(".buyer-feedback span") else "",
"date": item.select_one(".r-time").text.strip()
if item.select_one(".r-time") else "",
"country": item.select_one(".user-country").text.strip()
if item.select_one(".user-country") else "",
})
time.sleep(2)
return reviews
The Production-Ready Approach
Building a robust AliExpress scraper is challenging due to frequent page structure changes and aggressive anti-bot measures. For reliable, large-scale extraction, check out the AliExpress Scraper on Apify — it handles all the rendering, captcha solving, and data parsing out of the box.
Saving Data
import pandas as pd
def save_products(products, filename="aliexpress_products.csv"):
df = pd.DataFrame(products)
df.to_csv(filename, index=False)
print(f"Saved {len(products)} products to {filename}")
# Example usage
products = scrape_search_results("wireless earbuds", pages=5)
save_products(products)
Tips for Reliable AliExpress Scraping
- Use a rendering service: AliExpress relies heavily on JavaScript. ScraperAPI handles this automatically
- Rotate User-Agents: Vary your browser fingerprint
- Respect rate limits: 2-5 seconds between requests minimum
- Handle currency/locale: Set appropriate cookies for consistent pricing data
- Monitor for changes: AliExpress updates their frontend frequently
Conclusion
AliExpress product data extraction is powerful for e-commerce intelligence, but requires handling JavaScript rendering and anti-bot protections. Whether you build a custom solution or use a managed tool like the AliExpress Scraper, always scrape responsibly.
Happy data mining!
Top comments (0)