Ever wished you could instantly compare prices across hundreds of wholesale suppliers without manually browsing through pages? In this tutorial, we'll build a practical price comparison tool using real product data from Yiwugo.com — China's largest small commodities marketplace.
By the end, you'll have a working Python script that scrapes product data, normalizes prices, and generates comparison reports you can actually use for sourcing decisions.
Why Price Comparison Matters in Wholesale
When sourcing from Yiwugo, you'll often find dozens of suppliers selling similar products at different price points. The differences aren't just about price — minimum order quantities (MOQs), shipping terms, and supplier ratings all factor in.
Manually comparing these across 50+ listings is tedious. Automating it saves hours and catches deals you'd otherwise miss.
Architecture Overview
Our tool has three stages:
- Data Collection — Scrape product listings from Yiwugo using the Apify actor
- Data Processing — Normalize prices, calculate unit costs, score suppliers
- Report Generation — Output a ranked comparison as CSV and terminal table
Step 1: Collect Product Data
We'll use the Yiwugo Scraper on Apify to gather product data. Install the Apify client first:
pip install apify-client
Here's the data collection script:
from apify_client import ApifyClient
import json
def collect_products(keyword, max_items=50):
"""Scrape Yiwugo products for a given keyword."""
client = ApifyClient("YOUR_APIFY_TOKEN")
run_input = {
"keyword": keyword,
"maxItems": max_items,
"sortType": "general"
}
run = client.actor("jungle_intertwining/yiwugo-scraper").call(
run_input=run_input
)
items = list(
client.dataset(run["defaultDatasetId"]).iterate_items()
)
print(f"Collected {len(items)} products for '{keyword}'")
return items
A single call gives you structured data including product name, price range, MOQ, supplier info, and shop ratings.
Step 2: Normalize and Score
Raw data needs cleaning. Prices on Yiwugo are typically shown as ranges (e.g., ¥2.50 - ¥4.80). We need to normalize these for fair comparison:
import re
def parse_price(price_str):
"""Extract numeric price from string like '¥2.50r '2.50元'."""
if not price_str:
return None
numbers = re.findall(r'[\d.]+', str(price_str))
return float(numbers[0]) if numbers else None
def calculate_score(product):
"""Score a product based on price, MOQ, and supplier quality."""
score = 100.0
# Price factor (lower is better)
min_price = parse_price(product.get("priceMin"))
if min_price:
price_penalty = min(min_price / 10, 30)
score -= price_penalty
# MOQ factor (lower is better for small buyers)
moq = product.get("minOrder", 0)
if isinstance(moq, (int, float)) and moq > 0:
if moq <= 10:
score += 10
elif moq <= 100:
score += 5
elif moq > 1000:
score -= 10
# Supplier rating factor
shop_level = product.get("shopLevel", 0)
if isinstance(shop_level, (int, float)):
score += min(shop_level * 2, 10)
# Online ordering available (convenience bonus)
if product.get("onlineOrder"):
score += 5
return round(score, 1)
def process_products(items):
"""Process raw items into scored, sorted results."""
processed = []
for item in items:
min_price = parse_price(item.get("priceMin"))
max_price = parse_price(item.get("priceMax"))
processed.append({
"name": item.get("title", "Unknown")[:60],
"supplier": item.get("shopName", "Unknown"),
"min_price": min_price,
"max_price": max_price,
"avg_price": round((min_price + max_price) / 2, 2)
if min_price and max_price else min_price,
"currency": item.get("currency", "CNY"),
"moq": item.get("minOrder", "N/A"),
"shop_level": item.get("shopLevel", 0),
"online_order": item.get("onlineOrder", False),
"url": item.get("url", ""),
"score": calculate_score(item)
})
processed.sort(key=lambda x: x["score"], reverse=True)
return processed
The scoring formula weighs three factors:
- Price — Lower prices score higher
- MOQ — Lower minimums are better for testing new products
- Supplier quality — Higher shop levels indicate more established## Step 3: Generate Comparison Report
Now let's output the results in a useful format:
import csv
from datetime import datetime
def print_comparison(products, keyword):
"""Print a formatted comparison table to terminal."""
print(f"\n{'='*80}")
print(f" PRICE COMPARISON: {keyword.upper()}")
print(f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
print(f" Products analyzed: {len(products)}")
print(f"{'='*80}\n")
print(f"{'Rank':<5} {'Score':<7} {'Price (CNY)':<14} "
f"{'MOQ':<8} {'Supplier':<25} {'Level':<6}")
print("-" * 75)
for i, p in enumerate(products[:20], 1):
price_str = f"{p['min_price']:.2f}" if p['min_price'] else "N/A"
if p['max_price'] and p['max_price'] != p['min_price']:
price_str += f"-{p['max_price']:.2f}"
print(f"{i:<5} {p['score']:<7} {price_str:<14} "
f"{str(p['moq']):<8} {p['supplier'][:24]:<25} "
f"{'⭐' * min(int(p['shop_level'] or 0), 5)}")
prices = [p['min_price'] for p in products if p['min_price']]
if prices:
print(f"\n📊 Price range: ¥{min(prices):.2f} - ¥{max(prices):.2f}")
print(f"📊 Average: ¥{sum(prices)/len(prices):.2f}")
print(f"📊 Median: ¥{sorted(prices)[len(prices)//2]:.2f}")
def export_csv(products, keyword):
"""Export comparison to CSV file."""
filename = f"comparison_{keyword.replace(' ', '_')}_{datetime.now():%Y%m%d}.csv"
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=[
"name", "supplier", "min_price", "max_price",
"avg_price", "currency", "moq", "shop_level",
"online_order", "score", "url"
])
writer.writeheader()
writer.writerows(products)
print(f"\n✅ Exported to {filename}")
return filename
Putting It All Together
Here's the complete workflow:
def compare_prices(keyword, max_items=50):
"""Full pipeline: collect → process → report."""
print(f"🔍 Searching Yiwugo for: {keyword}")
# Collect
raw_items = collect_products(keyword, max_items)
if not raw_items:
print("No products found.")
return
# Process
products = process_products(raw_items)
# Report
print_comparison(products, keyword)
export_csv(products, keyword)
# Top recommendation
best = products[0]
print(f"\n🏆 Top pick: {best['supplier']}")
print(f" Price: ¥{best['min_price']:.2f} | "
f"MOQ: {best['moq']} | Score: {best['score']}")
print(f" Link: {best['url']}")
return products
if __name__ == "__main__":
# Compare USB cables across suppliers
results = compare_prices("USB数据线", max_items=50)
# You can also compare multiple categories
for category in ["蓝牙耳机", "手机壳", "LED灯"]:
compare_prices(category, max_items=30)
Running this produces output like:
================================================================================
PRICE COMPARISON: USB数据线
Generated: 2026-02-12 15:30
Products analyzed: 47
================================================================================
Rank Score Price (CNY) MOQ Supplier Level
---------------------------------------------------------------------------
1 97.3 1.20-2.50 10 义乌市创达电子商行 ⭐⭐⭐⭐
2 94.1 1.50-3.00 20 金华市恒通数码科技 ⭐⭐⭐⭐⭐
3 91.8 0.80-1.80 100 浙江优品电子有限公司 ⭐⭐⭐
...
📊 Price range: ¥0.50 - ¥15.80
📊 Average: ¥3.42
📊 Median: ¥2.80
Advanced: Track Price Changes Over Time
Want to monitor how prices shift? Add a simple history tracker:
import os
HISTORY_DIR = "price_history"
def save_snapshot(products, keyword):
"""Save current prices for historical comparison."""
os.makedirs(HISTORY_DIR, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
filepath = f"{HISTORY_DIR}/{keyword}_{timestamp}.json"
with open(filepath, "w", encoding="utf-8") as f:
json.dump(products, f, ensure_ascii=False, indent=2)
def compare_with_previous(products, keyword):
"""Compare current prices with the most recent snapshot."""
import glob
pattern = f"{HISTORY_DIR}/{keyword}_*.json"
files = sorted(glob.glob(pattern))
if len(files) < 2:
print("Not enough history for comparison yet.")
return
with open(files[-2], encoding="utf-8") as f:
previous = {p["url"]: p for p in json.load(f)}
changes = []
for product in products:
url = product["url"]
if url in previous:
old_price = previous[url].get("min_price", 0)
new_price = product.get("min_price", 0)
if old_price and new_price and old_price != new_price:
pct = ((new_price - old_price) / old_price) * 100
changes.append({
"name": product["name"],
"old": old_price,
"new": new_price,
"change_pct": round(pct, 1)
})
if changes:
print(f"\n📈 Price changes detected ({len(changes)} products):")
for c in changes:
arrow = "🔴" if c["change_pct"] > 0 else "🟢"
print(f" {arrow} {c['name'][:40]}: "
f"¥{c['old']:.2f} → ¥{c['new']:.2f} "
f"({c['change_pct']:+.1f}%)")
else:
print("\n✅ No price changes since last check.")
What You Can Do With This
Once you have structured comparison data, the possibilities open up:
- Sourcing automation — Set alerts when prices drop below your target
- Supplier vetting — Filter by shop level and MOQ to find reliable partners
- Market research — Track price trends across product categories over weeks
- Competitive analysis — Compare your current supplier's prices against the market
Get Started
The Yiwugo Scraper handles all the heavy lifting — anti-bot bypassing, pagination, data normalization. You just write the business logic on top.
Full source code from this tutorial is available on GitHub.
📚 Related: New to scraping Chinese platforms? Read Scraping Chinese E-commerce Sites: Challenges and Solutions for essential tips on handling anti-bot systems and encoding quirks.
Building tools for wholesale data? I'd love to hear what you're working on. Drop a comment below or check out the Yiwugo Scraper on Apify Store.
📦 Also check out: DHgate Scraper — Extract DHgate product data for dropshipping research.
- Made-in-China Scraper — Extract B2B product data, supplier info, and MOQ from Made-in-China.com
📚 More on wholesale data:
Top comments (0)