How to Scrape Alibaba and AliExpress for Product Sourcing

#python #tutorial #webdev #programming

Alibaba and AliExpress are the world's largest B2B and B2C marketplaces. Scraping product data helps e-commerce entrepreneurs find suppliers, compare prices, and identify profitable products to sell.

Why Scrape These Platforms?

Find wholesale suppliers for your products
Compare pricing across thousands of sellers
Track product trends and seasonal demand
Build automated sourcing pipelines

Setting Up

pip install requests beautifulsoup4 pandas

Scraping AliExpress Listings

import requests
from bs4 import BeautifulSoup

def scrape_aliexpress(keyword, pages=3):
    products = []

    for page in range(1, pages + 1):
        url = f"https://www.aliexpress.com/wholesale?SearchText={keyword}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
            "Accept-Language": "en-US,en;q=0.9"
        }

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")

        for item in soup.select("[class*=SearchResult]"):
            title = item.select_one("[class*=title]")
            price = item.select_one("[class*=price]")
            orders = item.select_one("[class*=trade]")

            if title:
                products.append({
                    "title": title.text.strip(),
                    "price": price.text.strip() if price else "N/A",
                    "orders": orders.text.strip() if orders else "0",
                    "source": "aliexpress"
                })

    return products

results = scrape_aliexpress("wireless earbuds")
print(f"Found {len(results)} products")

Handling Dynamic Content

Both platforms use heavy JavaScript rendering. ScraperAPI handles this automatically:

def scrape_with_rendering(keyword):
    params = {
        "api_key": "YOUR_SCRAPERAPI_KEY",
        "url": f"https://www.aliexpress.com/wholesale?SearchText={keyword}",
        "render": "true"
    }
    response = requests.get("https://api.scraperapi.com", params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    products = []
    for item in soup.select("[class*=product]"):
        title = item.select_one("h1, h3, [class*=title]")
        price = item.select_one("[class*=price]")
        if title:
            products.append({
                "title": title.text.strip(),
                "price": price.text.strip() if price else "N/A"
            })
    return products

Scraping Alibaba B2B Listings

def scrape_alibaba(keyword, pages=2):
    products = []

    for page in range(1, pages + 1):
        url = f"https://www.alibaba.com/trade/search?SearchText={keyword}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        }

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")

        for item in soup.select(".organic-list-offer"):
            title = item.select_one(".organic-list-offer__title")
            price = item.select_one(".organic-list-offer__price")
            moq = item.select_one(".organic-list-offer__moq")

            if title:
                products.append({
                    "title": title.text.strip(),
                    "price": price.text.strip() if price else "N/A",
                    "moq": moq.text.strip() if moq else "N/A",
                    "source": "alibaba"
                })

    return products

Price Comparison

import pandas as pd
import re

def compare_prices(keyword):
    ali_products = scrape_aliexpress(keyword)
    baba_products = scrape_alibaba(keyword)

    all_products = ali_products + baba_products
    df = pd.DataFrame(all_products)

    def extract_price(price_str):
        nums = re.findall(r"[\d.]+", str(price_str))
        return float(nums[0]) if nums else None

    df["price_usd"] = df["price"].apply(extract_price)
    print("Average price by source:")
    print(df.groupby("source")["price_usd"].mean())
    return df

Product Trend Tracking

import sqlite3
from datetime import datetime

def track_product(keyword):
    conn = sqlite3.connect("sourcing.db")
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS products (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            keyword TEXT, title TEXT,
            price TEXT, source TEXT, date TEXT
        )
    """)
    products = scrape_aliexpress(keyword, pages=1)
    for p in products:
        cursor.execute(
            "INSERT INTO products (keyword, title, price, source, date) VALUES (?, ?, ?, ?, ?)",
            (keyword, p["title"], p["price"], p["source"], datetime.now().strftime("%Y-%m-%d"))
        )
    conn.commit()
    conn.close()

Scaling

For large-scale sourcing, use ThorData for residential proxies or ScrapeOps to manage and monitor your scraping infrastructure.

Conclusion

Scraping Alibaba and AliExpress automates product sourcing. Compare prices, track trends, and find profitable products systematically.

Happy sourcing! Follow for more e-commerce automation tutorials.

DEV Community