DEV Community

agenthustler
agenthustler

Posted on

How to Scrape Alibaba and AliExpress for Product Sourcing

Alibaba and AliExpress are the world's largest B2B and B2C marketplaces. Scraping product data helps e-commerce entrepreneurs find suppliers, compare prices, and identify profitable products to sell.

Why Scrape These Platforms?

  • Find wholesale suppliers for your products
  • Compare pricing across thousands of sellers
  • Track product trends and seasonal demand
  • Build automated sourcing pipelines

Setting Up

pip install requests beautifulsoup4 pandas
Enter fullscreen mode Exit fullscreen mode

Scraping AliExpress Listings

import requests
from bs4 import BeautifulSoup

def scrape_aliexpress(keyword, pages=3):
    products = []

    for page in range(1, pages + 1):
        url = f"https://www.aliexpress.com/wholesale?SearchText={keyword}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
            "Accept-Language": "en-US,en;q=0.9"
        }

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")

        for item in soup.select("[class*=SearchResult]"):
            title = item.select_one("[class*=title]")
            price = item.select_one("[class*=price]")
            orders = item.select_one("[class*=trade]")

            if title:
                products.append({
                    "title": title.text.strip(),
                    "price": price.text.strip() if price else "N/A",
                    "orders": orders.text.strip() if orders else "0",
                    "source": "aliexpress"
                })

    return products

results = scrape_aliexpress("wireless earbuds")
print(f"Found {len(results)} products")
Enter fullscreen mode Exit fullscreen mode

Handling Dynamic Content

Both platforms use heavy JavaScript rendering. ScraperAPI handles this automatically:

def scrape_with_rendering(keyword):
    params = {
        "api_key": "YOUR_SCRAPERAPI_KEY",
        "url": f"https://www.aliexpress.com/wholesale?SearchText={keyword}",
        "render": "true"
    }
    response = requests.get("https://api.scraperapi.com", params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    products = []
    for item in soup.select("[class*=product]"):
        title = item.select_one("h1, h3, [class*=title]")
        price = item.select_one("[class*=price]")
        if title:
            products.append({
                "title": title.text.strip(),
                "price": price.text.strip() if price else "N/A"
            })
    return products
Enter fullscreen mode Exit fullscreen mode

Scraping Alibaba B2B Listings

def scrape_alibaba(keyword, pages=2):
    products = []

    for page in range(1, pages + 1):
        url = f"https://www.alibaba.com/trade/search?SearchText={keyword}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        }

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")

        for item in soup.select(".organic-list-offer"):
            title = item.select_one(".organic-list-offer__title")
            price = item.select_one(".organic-list-offer__price")
            moq = item.select_one(".organic-list-offer__moq")

            if title:
                products.append({
                    "title": title.text.strip(),
                    "price": price.text.strip() if price else "N/A",
                    "moq": moq.text.strip() if moq else "N/A",
                    "source": "alibaba"
                })

    return products
Enter fullscreen mode Exit fullscreen mode

Price Comparison

import pandas as pd
import re

def compare_prices(keyword):
    ali_products = scrape_aliexpress(keyword)
    baba_products = scrape_alibaba(keyword)

    all_products = ali_products + baba_products
    df = pd.DataFrame(all_products)

    def extract_price(price_str):
        nums = re.findall(r"[\d.]+", str(price_str))
        return float(nums[0]) if nums else None

    df["price_usd"] = df["price"].apply(extract_price)
    print("Average price by source:")
    print(df.groupby("source")["price_usd"].mean())
    return df
Enter fullscreen mode Exit fullscreen mode

Product Trend Tracking

import sqlite3
from datetime import datetime

def track_product(keyword):
    conn = sqlite3.connect("sourcing.db")
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS products (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            keyword TEXT, title TEXT,
            price TEXT, source TEXT, date TEXT
        )
    """)
    products = scrape_aliexpress(keyword, pages=1)
    for p in products:
        cursor.execute(
            "INSERT INTO products (keyword, title, price, source, date) VALUES (?, ?, ?, ?, ?)",
            (keyword, p["title"], p["price"], p["source"], datetime.now().strftime("%Y-%m-%d"))
        )
    conn.commit()
    conn.close()
Enter fullscreen mode Exit fullscreen mode

Scaling

For large-scale sourcing, use ThorData for residential proxies or ScrapeOps to manage and monitor your scraping infrastructure.

Conclusion

Scraping Alibaba and AliExpress automates product sourcing. Compare prices, track trends, and find profitable products systematically.


Happy sourcing! Follow for more e-commerce automation tutorials.

Top comments (0)