Scraping Shopify Stores: Products, Prices, and Inventory
Shopify powers over 4 million online stores. For competitive intelligence, price monitoring, or market research, scraping Shopify stores is incredibly valuable. The good news: Shopify has a predictable URL structure that makes scraping straightforward.
The Shopify JSON Trick
Every Shopify store exposes product data via a hidden JSON endpoint. Just append /products.json to any Shopify store URL:
https://store-name.com/products.json
https://store-name.com/products.json?page=2&limit=250
This is the easiest e-commerce scraping you will ever do.
Setup
pip install requests pandas
Basic Product Scraper
import requests
import pandas as pd
import time
def scrape_shopify_store(store_url, max_pages=10):
store_url = store_url.rstrip("/")
all_products = []
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
for page in range(1, max_pages + 1):
url = f"{store_url}/products.json?page={page}&limit=250"
response = requests.get(url, headers=headers)
if response.status_code != 200:
break
data = response.json()
products = data.get("products", [])
if not products:
break
for product in products:
for variant in product.get("variants", []):
all_products.append({
"title": product["title"],
"vendor": product.get("vendor", ""),
"product_type": product.get("product_type", ""),
"variant_title": variant.get("title", ""),
"price": float(variant.get("price", 0)),
"compare_at_price": variant.get("compare_at_price"),
"sku": variant.get("sku", ""),
"available": variant.get("available", False),
"created_at": product.get("created_at", ""),
"updated_at": product.get("updated_at", ""),
"tags": ",".join(product.get("tags", [])),
})
print(f"Page {page}: {len(products)} products")
time.sleep(1)
return pd.DataFrame(all_products)
df = scrape_shopify_store("https://example-store.myshopify.com")
print(f"Total variants: {len(df)}")
Detecting Shopify Stores
def is_shopify(url):
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=10)
indicators = [
"cdn.shopify.com" in response.text,
"Shopify.theme" in response.text,
response.headers.get("x-shopify-stage") is not None
]
return any(indicators)
except Exception:
return False
print(is_shopify("https://allbirds.com"))
Scraping Collections
def scrape_collections(store_url):
url = f"{store_url.rstrip('/')}/collections.json"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
if response.status_code == 200:
collections = response.json().get("collections", [])
return [{
"title": c["title"],
"handle": c["handle"],
"products_count": c.get("products_count", "N/A"),
"url": f"{store_url}/collections/{c['handle']}"
} for c in collections]
return []
Price Monitoring
Track price changes over time:
import json
from datetime import datetime
def monitor_prices(store_url, output_file="price_history.json"):
df = scrape_shopify_store(store_url)
snapshot = {
"timestamp": datetime.now().isoformat(),
"products": df.to_dict(orient="records")
}
try:
with open(output_file, "r") as f:
history = json.load(f)
except FileNotFoundError:
history = []
history.append(snapshot)
with open(output_file, "w") as f:
json.dump(history, f, indent=2)
print(f"Snapshot saved: {len(df)} products at {snapshot['timestamp']}")
Scaling Across Many Stores
When scraping hundreds of Shopify stores, you will hit rate limits. Use ScraperAPI to rotate IPs and handle blocks:
def scrape_via_proxy(store_url):
proxy_url = f"https://api.scraperapi.com?api_key=YOUR_KEY&url={store_url}/products.json"
return requests.get(proxy_url).json()
For geo-targeted pricing research, ThorData residential proxies let you appear from specific regions. ScrapeOps helps monitor success rates when scaling across multiple stores.
Export and Analysis
print(f"Average price: ${df['price'].mean():.2f}")
print(f"Price range: ${df['price'].min():.2f} - ${df['price'].max():.2f}")
total = len(df)
available = df["available"].sum()
print(f"In stock: {available}/{total} ({available/total*100:.1f}%)")
df.to_csv("shopify_products.csv", index=False)
Legal Note
The /products.json endpoint is publicly accessible by design. However, respect store owners by not hammering their servers. Use delays and caching. For commercial use, consider Shopify's official Partner API.
Follow for more e-commerce data collection tutorials!
Top comments (0)