How to Scrape Etsy: Products, Sellers, and Sales Data

#python #tutorial #webdev #programming

How to Scrape Etsy: Products, Sellers, and Sales Data

Etsy is a goldmine for e-commerce research. Whether you are analyzing trending products, studying pricing strategies, or building a competitive intelligence tool, scraping Etsy gives you the data you need.

What You Can Extract

Product listings with prices, descriptions, and images
Seller information and shop statistics
Review counts and ratings
Sales estimates and trending items
Category and tag data

Setup

pip install requests beautifulsoup4 pandas

Product Search Scraper

import requests
from bs4 import BeautifulSoup
import json
import time
import re

class EtsyScraper:
    def __init__(self, proxy_key=None):
        self.session = requests.Session()
        self.proxy_key = proxy_key
        self.base_url = "https://www.etsy.com"
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
            "Accept-Language": "en-US,en;q=0.9"
        })

    def fetch(self, url):
        if self.proxy_key:
            api_url = f"http://api.scraperapi.com?api_key={self.proxy_key}&url={url}"
            return self.session.get(api_url, timeout=30)
        return self.session.get(url, timeout=15)

    def search_products(self, query, max_pages=3, sort="most_relevant"):
        all_products = []
        for page in range(1, max_pages + 1):
            url = f"{self.base_url}/search?q={query}&page={page}&order={sort}"
            response = self.fetch(url)

            if response.status_code != 200:
                break

            products = self.parse_search_results(response.text)
            if not products:
                break

            all_products.extend(products)
            time.sleep(2)

        return all_products

    def parse_search_results(self, html):
        soup = BeautifulSoup(html, "html.parser")
        products = []

        listings = soup.select("[data-listing-id]")
        for listing in listings:
            try:
                listing_id = listing.get("data-listing-id", "")
                title_el = listing.select_one(".v2-listing-card__title")
                price_el = listing.select_one(".currency-value")
                shop_el = listing.select_one(".v2-listing-card__shop")

                # Extract rating
                rating_el = listing.select_one(".stars-svg")
                rating = 0
                if rating_el:
                    label = rating_el.get("aria-label", "")
                    match = re.search(r"([0-9.]+)", label)
                    if match:
                        rating = float(match.group(1))

                products.append({
                    "id": listing_id,
                    "title": title_el.text.strip() if title_el else "",
                    "price": self.parse_price(price_el),
                    "shop": shop_el.text.strip() if shop_el else "",
                    "rating": rating,
                    "url": f"{self.base_url}/listing/{listing_id}"
                })
            except (AttributeError, ValueError):
                continue

        return products

    def parse_price(self, el):
        if el:
            try:
                return float(el.text.strip().replace(",", ""))
            except ValueError:
                pass
        return 0.0

Product Detail Scraper

class ProductDetailScraper(EtsyScraper):
    def get_product_details(self, listing_url):
        response = self.fetch(listing_url)
        soup = BeautifulSoup(response.text, "html.parser")

        details = {}

        # Title and description
        title_el = soup.select_one("h1")
        details["title"] = title_el.text.strip() if title_el else ""

        desc_el = soup.select_one("[data-id=description-text]")
        details["description"] = desc_el.text.strip() if desc_el else ""

        # Price
        price_el = soup.select_one(".wt-text-title-03")
        if price_el:
            match = re.search(r"[0-9,.]+", price_el.text)
            details["price"] = float(match.group().replace(",", "")) if match else 0

        # Reviews and sales
        reviews_el = soup.select_one("[data-reviews-total]")
        details["reviews"] = int(reviews_el.get("data-reviews-total", 0)) if reviews_el else 0

        sales_el = soup.select_one(".wt-text-caption")
        if sales_el and "sale" in sales_el.text.lower():
            match = re.search(r"([0-9,]+)", sales_el.text)
            details["sales"] = int(match.group().replace(",", "")) if match else 0

        # Tags
        tags = [tag.text.strip() for tag in soup.select("[data-tag]")]
        details["tags"] = tags

        return details

Shop Analytics

class ShopScraper(EtsyScraper):
    def get_shop_info(self, shop_name):
        url = f"{self.base_url}/shop/{shop_name}"
        response = self.fetch(url)
        soup = BeautifulSoup(response.text, "html.parser")

        info = {"name": shop_name}

        sales_el = soup.select_one(".shop-sales-count")
        if sales_el:
            match = re.search(r"([0-9,]+)", sales_el.text)
            info["total_sales"] = int(match.group().replace(",", "")) if match else 0

        rating_el = soup.select_one(".stars-svg")
        if rating_el:
            match = re.search(r"([0-9.]+)", rating_el.get("aria-label", ""))
            info["rating"] = float(match.group()) if match else 0

        listings = soup.select("[data-listing-id]")
        info["active_listings"] = len(listings)

        return info

Data Analysis

import pandas as pd

scraper = EtsyScraper()
products = scraper.search_products("handmade candles", max_pages=5)
df = pd.DataFrame(products)

print(f"Total products: {len(df)}")
print(f"Price range: ${df['price'].min():.2f} - ${df['price'].max():.2f}")
print(f"Average price: ${df['price'].mean():.2f}")
print(f"Median price: ${df['price'].median():.2f}")
print(f"\nTop shops by listings:")
print(df["shop"].value_counts().head(10))

df.to_csv("etsy_products.csv", index=False)

Handling Anti-Scraping

Etsy uses Cloudflare and other protections:

ScraperAPI — bypasses Cloudflare, handles CAPTCHAs
ThorData — residential proxies for consistent access
ScrapeOps — track success rates and get alerts

Conclusion

Etsy scraping reveals market trends, pricing strategies, and competitive dynamics in the handmade/vintage marketplace. Use this data to find profitable niches or optimize your own shop.

Follow for more e-commerce scraping guides!

DEV Community

How to Scrape Etsy: Products, Sellers, and Sales Data