Why Auction Data Matters
Auction prices are the purest signal of market value — what someone actually paid, not what a seller hoped for. Whether you're flipping collectibles, investing in art, or doing market research, programmatic access to auction results is incredibly valuable.
The Challenge
Each platform structures data differently:
- eBay — completed listings with bid histories
- Sotheby's — high-end lots with estimate ranges
- Christie's — detailed provenance and hammer prices
Setup
pip install requests beautifulsoup4 pandas selenium
Auction sites are heavily protected. ScraperAPI handles rotating proxies and browser fingerprinting so you can focus on parsing.
Scraping eBay Completed Listings
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
SCRAPER_API = "http://api.scraperapi.com"
API_KEY = "YOUR_SCRAPERAPI_KEY"
def scrape_ebay_sold(query, pages=3):
"""Scrape completed/sold listings from eBay."""
all_items = []
for page in range(1, pages + 1):
target = f"https://www.ebay.com/sch/i.html?_nkw={query}&LH_Complete=1&LH_Sold=1&_pgn={page}"
url = f"{SCRAPER_API}?api_key={API_KEY}&url={target}"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
for item in soup.select(".s-item"):
title_el = item.select_one(".s-item__title")
price_el = item.select_one(".s-item__price")
date_el = item.select_one(".s-item__ended-date, .s-item__endedDate")
if title_el and price_el:
all_items.append({
"title": title_el.text.strip(),
"sold_price": parse_price(price_el.text),
"sold_date": date_el.text.strip() if date_el else None,
"source": "ebay"
})
return pd.DataFrame(all_items)
def parse_price(text):
"""Extract numeric price from text like ,234.56."""
import re
match = re.search(r"[\d,]+\.?\d*", text.replace(",", ""))
return float(match.group()) if match else None
Scraping Sotheby's Auction Results
def scrape_sothebys_results(category="watches", limit=50):
"""Scrape auction results from Sothebys."""
target = f"https://www.sothebys.com/en/results?query={category}"
url = f"{SCRAPER_API}?api_key={API_KEY}&url={target}&render=true"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
lots = []
for lot in soup.select(".LotCard, [data-testid=lot-card]"):
title = lot.select_one(".lot-title, h3")
estimate = lot.select_one(".estimate")
hammer = lot.select_one(".hammer-price, .sold-price")
if title:
lots.append({
"title": title.text.strip(),
"estimate": estimate.text.strip() if estimate else None,
"hammer_price": parse_price(hammer.text) if hammer else None,
"source": "sothebys"
})
return pd.DataFrame(lots[:limit])
Christie's Results
def scrape_christies_results(search_term):
"""Scrape auction results from Christies."""
target = f"https://www.christies.com/en/results?keyword={search_term}&isautosuggestclick=false&saession=sold"
url = f"{SCRAPER_API}?api_key={API_KEY}&url={target}&render=true"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
results = []
for lot in soup.select(".lot-container, .chr-lot"):
title = lot.select_one(".lot-title, .chr-lot__title")
price = lot.select_one(".price-realised, .chr-lot__price")
sale = lot.select_one(".sale-title, .chr-lot__sale")
if title and price:
results.append({
"title": title.text.strip(),
"hammer_price": parse_price(price.text),
"sale": sale.text.strip() if sale else None,
"source": "christies"
})
return pd.DataFrame(results)
Cross-Platform Analysis
def price_analysis(query):
"""Aggregate and analyze prices across platforms."""
ebay = scrape_ebay_sold(query)
sothebys = scrape_sothebys_results(query)
christies = scrape_christies_results(query)
all_data = pd.concat([ebay, sothebys, christies], ignore_index=True)
all_data = all_data.dropna(subset=["sold_price", "hammer_price"], how="all")
# Normalize price column
all_data["price"] = all_data["sold_price"].fillna(all_data["hammer_price"])
print(f"\n📊 Price Analysis for {query}")
print(f"Total results: {len(all_data)}")
print(f"Mean price: ${all_data[price].mean():,.2f}")
print(f"Median price: ${all_data[price].median():,.2f}")
print(f"Price range: ${all_data[price].min():,.2f} - ${all_data[price].max():,.2f}")
print(f"\nBy platform:")
print(all_data.groupby("source")["price"].describe())
return all_data
Proxy Strategy
Auction sites employ sophisticated bot detection. Layer your approach:
- ScraperAPI for general scraping with auto-retry
- ThorData residential proxies for location-specific pricing
- ScrapeOps for monitoring scraper health and success rates
Legal Considerations
- eBay completed listings are publicly accessible
- Auction results are generally public records
- Respect rate limits and Terms of Service
- Don't scrape personal bidder information
Conclusion
Auction data scraping gives you pricing intelligence that's impossible to get any other way. Start with eBay for volume, then add premium houses for high-value categories. The combination gives you the most complete picture of what things actually sell for.
Top comments (0)