DEV Community

agenthustler
agenthustler

Posted on

How to Scrape Etsy: Products, Sellers, and Sales Data

How to Scrape Etsy: Products, Sellers, and Sales Data

Etsy is a goldmine for e-commerce research. Whether you are analyzing trending products, studying pricing strategies, or building a competitive intelligence tool, scraping Etsy gives you the data you need.

What You Can Extract

  • Product listings with prices, descriptions, and images
  • Seller information and shop statistics
  • Review counts and ratings
  • Sales estimates and trending items
  • Category and tag data

Setup

pip install requests beautifulsoup4 pandas
Enter fullscreen mode Exit fullscreen mode

Product Search Scraper

import requests
from bs4 import BeautifulSoup
import json
import time
import re

class EtsyScraper:
    def __init__(self, proxy_key=None):
        self.session = requests.Session()
        self.proxy_key = proxy_key
        self.base_url = "https://www.etsy.com"
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
            "Accept-Language": "en-US,en;q=0.9"
        })

    def fetch(self, url):
        if self.proxy_key:
            api_url = f"http://api.scraperapi.com?api_key={self.proxy_key}&url={url}"
            return self.session.get(api_url, timeout=30)
        return self.session.get(url, timeout=15)

    def search_products(self, query, max_pages=3, sort="most_relevant"):
        all_products = []
        for page in range(1, max_pages + 1):
            url = f"{self.base_url}/search?q={query}&page={page}&order={sort}"
            response = self.fetch(url)

            if response.status_code != 200:
                break

            products = self.parse_search_results(response.text)
            if not products:
                break

            all_products.extend(products)
            time.sleep(2)

        return all_products

    def parse_search_results(self, html):
        soup = BeautifulSoup(html, "html.parser")
        products = []

        listings = soup.select("[data-listing-id]")
        for listing in listings:
            try:
                listing_id = listing.get("data-listing-id", "")
                title_el = listing.select_one(".v2-listing-card__title")
                price_el = listing.select_one(".currency-value")
                shop_el = listing.select_one(".v2-listing-card__shop")

                # Extract rating
                rating_el = listing.select_one(".stars-svg")
                rating = 0
                if rating_el:
                    label = rating_el.get("aria-label", "")
                    match = re.search(r"([0-9.]+)", label)
                    if match:
                        rating = float(match.group(1))

                products.append({
                    "id": listing_id,
                    "title": title_el.text.strip() if title_el else "",
                    "price": self.parse_price(price_el),
                    "shop": shop_el.text.strip() if shop_el else "",
                    "rating": rating,
                    "url": f"{self.base_url}/listing/{listing_id}"
                })
            except (AttributeError, ValueError):
                continue

        return products

    def parse_price(self, el):
        if el:
            try:
                return float(el.text.strip().replace(",", ""))
            except ValueError:
                pass
        return 0.0
Enter fullscreen mode Exit fullscreen mode

Product Detail Scraper

class ProductDetailScraper(EtsyScraper):
    def get_product_details(self, listing_url):
        response = self.fetch(listing_url)
        soup = BeautifulSoup(response.text, "html.parser")

        details = {}

        # Title and description
        title_el = soup.select_one("h1")
        details["title"] = title_el.text.strip() if title_el else ""

        desc_el = soup.select_one("[data-id=description-text]")
        details["description"] = desc_el.text.strip() if desc_el else ""

        # Price
        price_el = soup.select_one(".wt-text-title-03")
        if price_el:
            match = re.search(r"[0-9,.]+", price_el.text)
            details["price"] = float(match.group().replace(",", "")) if match else 0

        # Reviews and sales
        reviews_el = soup.select_one("[data-reviews-total]")
        details["reviews"] = int(reviews_el.get("data-reviews-total", 0)) if reviews_el else 0

        sales_el = soup.select_one(".wt-text-caption")
        if sales_el and "sale" in sales_el.text.lower():
            match = re.search(r"([0-9,]+)", sales_el.text)
            details["sales"] = int(match.group().replace(",", "")) if match else 0

        # Tags
        tags = [tag.text.strip() for tag in soup.select("[data-tag]")]
        details["tags"] = tags

        return details
Enter fullscreen mode Exit fullscreen mode

Shop Analytics

class ShopScraper(EtsyScraper):
    def get_shop_info(self, shop_name):
        url = f"{self.base_url}/shop/{shop_name}"
        response = self.fetch(url)
        soup = BeautifulSoup(response.text, "html.parser")

        info = {"name": shop_name}

        sales_el = soup.select_one(".shop-sales-count")
        if sales_el:
            match = re.search(r"([0-9,]+)", sales_el.text)
            info["total_sales"] = int(match.group().replace(",", "")) if match else 0

        rating_el = soup.select_one(".stars-svg")
        if rating_el:
            match = re.search(r"([0-9.]+)", rating_el.get("aria-label", ""))
            info["rating"] = float(match.group()) if match else 0

        listings = soup.select("[data-listing-id]")
        info["active_listings"] = len(listings)

        return info
Enter fullscreen mode Exit fullscreen mode

Data Analysis

import pandas as pd

scraper = EtsyScraper()
products = scraper.search_products("handmade candles", max_pages=5)
df = pd.DataFrame(products)

print(f"Total products: {len(df)}")
print(f"Price range: ${df['price'].min():.2f} - ${df['price'].max():.2f}")
print(f"Average price: ${df['price'].mean():.2f}")
print(f"Median price: ${df['price'].median():.2f}")
print(f"\nTop shops by listings:")
print(df["shop"].value_counts().head(10))

df.to_csv("etsy_products.csv", index=False)
Enter fullscreen mode Exit fullscreen mode

Handling Anti-Scraping

Etsy uses Cloudflare and other protections:

  • ScraperAPI — bypasses Cloudflare, handles CAPTCHAs
  • ThorData — residential proxies for consistent access
  • ScrapeOps — track success rates and get alerts

Conclusion

Etsy scraping reveals market trends, pricing strategies, and competitive dynamics in the handmade/vintage marketplace. Use this data to find profitable niches or optimize your own shop.


Follow for more e-commerce scraping guides!

Top comments (0)