How to Scrape Etsy: Products, Sellers, and Sales Data
Etsy is a goldmine for e-commerce research. Whether you are analyzing trending products, studying pricing strategies, or building a competitive intelligence tool, scraping Etsy gives you the data you need.
What You Can Extract
- Product listings with prices, descriptions, and images
- Seller information and shop statistics
- Review counts and ratings
- Sales estimates and trending items
- Category and tag data
Setup
pip install requests beautifulsoup4 pandas
Product Search Scraper
import requests
from bs4 import BeautifulSoup
import json
import time
import re
class EtsyScraper:
def __init__(self, proxy_key=None):
self.session = requests.Session()
self.proxy_key = proxy_key
self.base_url = "https://www.etsy.com"
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept-Language": "en-US,en;q=0.9"
})
def fetch(self, url):
if self.proxy_key:
api_url = f"http://api.scraperapi.com?api_key={self.proxy_key}&url={url}"
return self.session.get(api_url, timeout=30)
return self.session.get(url, timeout=15)
def search_products(self, query, max_pages=3, sort="most_relevant"):
all_products = []
for page in range(1, max_pages + 1):
url = f"{self.base_url}/search?q={query}&page={page}&order={sort}"
response = self.fetch(url)
if response.status_code != 200:
break
products = self.parse_search_results(response.text)
if not products:
break
all_products.extend(products)
time.sleep(2)
return all_products
def parse_search_results(self, html):
soup = BeautifulSoup(html, "html.parser")
products = []
listings = soup.select("[data-listing-id]")
for listing in listings:
try:
listing_id = listing.get("data-listing-id", "")
title_el = listing.select_one(".v2-listing-card__title")
price_el = listing.select_one(".currency-value")
shop_el = listing.select_one(".v2-listing-card__shop")
# Extract rating
rating_el = listing.select_one(".stars-svg")
rating = 0
if rating_el:
label = rating_el.get("aria-label", "")
match = re.search(r"([0-9.]+)", label)
if match:
rating = float(match.group(1))
products.append({
"id": listing_id,
"title": title_el.text.strip() if title_el else "",
"price": self.parse_price(price_el),
"shop": shop_el.text.strip() if shop_el else "",
"rating": rating,
"url": f"{self.base_url}/listing/{listing_id}"
})
except (AttributeError, ValueError):
continue
return products
def parse_price(self, el):
if el:
try:
return float(el.text.strip().replace(",", ""))
except ValueError:
pass
return 0.0
Product Detail Scraper
class ProductDetailScraper(EtsyScraper):
def get_product_details(self, listing_url):
response = self.fetch(listing_url)
soup = BeautifulSoup(response.text, "html.parser")
details = {}
# Title and description
title_el = soup.select_one("h1")
details["title"] = title_el.text.strip() if title_el else ""
desc_el = soup.select_one("[data-id=description-text]")
details["description"] = desc_el.text.strip() if desc_el else ""
# Price
price_el = soup.select_one(".wt-text-title-03")
if price_el:
match = re.search(r"[0-9,.]+", price_el.text)
details["price"] = float(match.group().replace(",", "")) if match else 0
# Reviews and sales
reviews_el = soup.select_one("[data-reviews-total]")
details["reviews"] = int(reviews_el.get("data-reviews-total", 0)) if reviews_el else 0
sales_el = soup.select_one(".wt-text-caption")
if sales_el and "sale" in sales_el.text.lower():
match = re.search(r"([0-9,]+)", sales_el.text)
details["sales"] = int(match.group().replace(",", "")) if match else 0
# Tags
tags = [tag.text.strip() for tag in soup.select("[data-tag]")]
details["tags"] = tags
return details
Shop Analytics
class ShopScraper(EtsyScraper):
def get_shop_info(self, shop_name):
url = f"{self.base_url}/shop/{shop_name}"
response = self.fetch(url)
soup = BeautifulSoup(response.text, "html.parser")
info = {"name": shop_name}
sales_el = soup.select_one(".shop-sales-count")
if sales_el:
match = re.search(r"([0-9,]+)", sales_el.text)
info["total_sales"] = int(match.group().replace(",", "")) if match else 0
rating_el = soup.select_one(".stars-svg")
if rating_el:
match = re.search(r"([0-9.]+)", rating_el.get("aria-label", ""))
info["rating"] = float(match.group()) if match else 0
listings = soup.select("[data-listing-id]")
info["active_listings"] = len(listings)
return info
Data Analysis
import pandas as pd
scraper = EtsyScraper()
products = scraper.search_products("handmade candles", max_pages=5)
df = pd.DataFrame(products)
print(f"Total products: {len(df)}")
print(f"Price range: ${df['price'].min():.2f} - ${df['price'].max():.2f}")
print(f"Average price: ${df['price'].mean():.2f}")
print(f"Median price: ${df['price'].median():.2f}")
print(f"\nTop shops by listings:")
print(df["shop"].value_counts().head(10))
df.to_csv("etsy_products.csv", index=False)
Handling Anti-Scraping
Etsy uses Cloudflare and other protections:
- ScraperAPI — bypasses Cloudflare, handles CAPTCHAs
- ThorData — residential proxies for consistent access
- ScrapeOps — track success rates and get alerts
Conclusion
Etsy scraping reveals market trends, pricing strategies, and competitive dynamics in the handmade/vintage marketplace. Use this data to find profitable niches or optimize your own shop.
Follow for more e-commerce scraping guides!
Top comments (0)