Steam is the world's largest PC gaming platform with over 70,000 games, millions of user reviews, and real-time player data. Whether you're building a game analytics dashboard, tracking pricing trends, or researching the indie game market, scraping Steam gives you access to data no other source provides.
In this guide, I'll walk you through scraping Steam game data, reviews, pricing, and player statistics using Python.
What Data Can You Get from Steam?
Steam's public pages and unofficial APIs expose a wealth of data:
- Game info — title, description, genres, tags, release date, developer, publisher
- Pricing — current price, discounts, historical lowest price, regional pricing
- Reviews — user review text, rating, playtime, helpfulness votes
- Player data — current players, peak players, player count history
- Store data — top sellers, new releases, trending games, wishlisted games
Setting Up
pip install requests beautifulsoup4 lxml
Using Steam's Unofficial API
Steam actually has several public JSON endpoints that don't require authentication. These are the easiest way to get structured data:
import requests
import time
import json
def get_app_details(app_id):
url = f"https://store.steampowered.com/api/appdetails?appids={app_id}"
response = requests.get(url)
data = response.json()
if not data[str(app_id)]["success"]:
return None
game = data[str(app_id)]["data"]
return {
"app_id": app_id,
"name": game.get("name"),
"type": game.get("type"),
"is_free": game.get("is_free"),
"description": game.get("short_description"),
"developers": game.get("developers", []),
"publishers": game.get("publishers", []),
"genres": [g["description"] for g in game.get("genres", [])],
"categories": [c["description"] for c in game.get("categories", [])],
"release_date": game.get("release_date", {}).get("date"),
"price": game.get("price_overview", {}).get("final_formatted"),
"discount": game.get("price_overview", {}).get("discount_percent", 0),
"metacritic": game.get("metacritic", {}).get("score"),
"recommendations": game.get("recommendations", {}).get("total"),
"platforms": game.get("platforms", {}),
}
# Example: Get details for Counter-Strike 2
game = get_app_details(730)
print(json.dumps(game, indent=2))
Scraping Player Count Data
Steam provides real-time player counts through its API:
def get_player_count(app_id):
url = f"https://api.steampowered.com/ISteamUserStats/GetNumberOfCurrentPlayers/v1/?appid={app_id}"
response = requests.get(url)
data = response.json()
if data.get("response", {}).get("result") == 1:
return data["response"]["player_count"]
return None
def track_player_counts(app_ids, interval=300):
from datetime import datetime
history = {app_id: [] for app_id in app_ids}
while True:
timestamp = datetime.now().isoformat()
for app_id in app_ids:
count = get_player_count(app_id)
if count is not None:
history[app_id].append({
"timestamp": timestamp,
"players": count,
})
print(f"App {app_id}: {count:,} players")
time.sleep(1)
with open("player_counts.json", "w") as f:
json.dump(history, f, indent=2)
time.sleep(interval)
# Track popular games: 730=CS2, 570=Dota 2, 440=TF2
track_player_counts([730, 570, 440])
Scraping User Reviews
Steam reviews are available through a dedicated API endpoint:
def get_reviews(app_id, num_reviews=100):
reviews = []
cursor = "*"
while len(reviews) < num_reviews:
url = (
f"https://store.steampowered.com/appreviews/{app_id}"
f"?json=1&num_per_page=100&cursor={cursor}"
f"&filter=recent&language=english"
)
response = requests.get(url)
data = response.json()
if not data.get("success") or not data.get("reviews"):
break
for review in data["reviews"]:
reviews.append({
"review_id": review["recommendationid"],
"author_id": review["author"]["steamid"],
"author_playtime": review["author"]["playtime_forever"],
"voted_up": review["voted_up"],
"text": review["review"],
"timestamp": review["timestamp_created"],
"votes_up": review["votes_up"],
"votes_funny": review["votes_funny"],
"weighted_score": review["weighted_vote_score"],
})
cursor = data.get("cursor", "")
if not cursor:
break
time.sleep(2)
return reviews[:num_reviews]
# Get recent reviews for Elden Ring (app_id: 1245620)
reviews = get_reviews(1245620, num_reviews=50)
for r in reviews[:3]:
sentiment = "Positive" if r["voted_up"] else "Negative"
hours = r["author_playtime"] // 60
print(f"[{sentiment}] ({hours}h played): {r['text'][:100]}...")
Scraping the Steam Store Pages
For data not available through the API (like tag-based browsing or sale pages), you can scrape the HTML:
from bs4 import BeautifulSoup
def scrape_top_sellers():
url = "https://store.steampowered.com/search/?filter=topsellers"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36",
"Cookie": "birthtime=0; wants_mature_content=1;"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "lxml")
games = []
rows = soup.select("a.search_result_row")
for row in rows:
title = row.select_one("span.title")
price = row.select_one("div.discount_final_price")
discount = row.select_one("div.discount_pct")
release = row.select_one("div.search_released")
app_id = row.get("data-ds-appid")
games.append({
"app_id": app_id,
"title": title.text.strip() if title else None,
"price": price.text.strip() if price else "Free",
"discount": discount.text.strip() if discount else "0%",
"release_date": release.text.strip() if release else None,
})
return games
top = scrape_top_sellers()
for g in top[:10]:
print(f"{g['title']} — {g['price']} ({g['discount']})")
Scraping Regional Pricing
Compare prices across regions to find the cheapest store:
def get_regional_prices(app_id, country_codes=None):
if country_codes is None:
country_codes = ["US", "GB", "BR", "TR", "AR", "IN", "RU"]
prices = {}
for cc in country_codes:
url = f"https://store.steampowered.com/api/appdetails?appids={app_id}&cc={cc}"
response = requests.get(url)
data = response.json()
if data[str(app_id)]["success"]:
price_info = data[str(app_id)]["data"].get("price_overview")
if price_info:
prices[cc] = {
"currency": price_info["currency"],
"price": price_info["final_formatted"],
"discount": price_info["discount_percent"],
}
time.sleep(1)
return prices
prices = get_regional_prices(1245620) # Elden Ring
for region, info in prices.items():
print(f"{region}: {info['price']} ({info['currency']})")
Handling Rate Limits and Blocks
Steam is relatively permissive but has limits:
- Store API — roughly 200 requests per 5 minutes per IP
- Review API — around 100 requests per 5 minutes
- HTML pages — more aggressive rate limiting, especially during sales
Using Proxies for Scale
When you need to scrape thousands of games, rotating proxies keep you from hitting rate limits. ScrapeOps provides a proxy API designed for web scraping:
SCRAPEOPS_KEY = "YOUR_KEY"
url = f"https://proxy.scrapeops.io/v1/?api_key={SCRAPEOPS_KEY}&url=https://store.steampowered.com/app/730"
response = requests.get(url)
For residential proxies that handle geo-targeted requests (useful for regional pricing), ThorData works well:
proxies = {
"http": "http://user:pass@proxy.thordata.com:9000",
"https": "http://user:pass@proxy.thordata.com:9000",
}
response = requests.get(url, proxies=proxies)
The Easy Way: Pre-Built Steam Scraper
Building and maintaining a Steam scraper takes ongoing effort — APIs change, rate limits shift, and edge cases pile up. If you want structured Steam data without the maintenance, there's a ready-to-use Steam Scraper on Apify that handles everything automatically.
It returns clean JSON for any game, including pricing, reviews, tags, and player data:
{
"name": "Elden Ring",
"app_id": 1245620,
"price": "$59.99",
"discount": "0%",
"genres": ["Action", "RPG"],
"developer": "FromSoftware Inc.",
"recent_reviews": "Very Positive",
"all_reviews": "Very Positive",
"current_players": 45231
}
No rate limit management, no proxy setup — just provide game IDs or search terms and get results.
Building a Game Deal Finder
Here's a practical script that finds the best current deals on Steam:
def find_best_deals(min_discount=50):
url = (
"https://store.steampowered.com/search/results/"
"?query&start=0&count=50&sort_by=Reviews_DESC"
f"&specials=1&json=1"
)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36",
}
response = requests.get(url, headers=headers)
data = response.json()
soup = BeautifulSoup(data.get("results_html", ""), "lxml")
deals = []
for row in soup.select("a.search_result_row"):
title = row.select_one("span.title")
discount = row.select_one("div.discount_pct")
original = row.select_one("div.discount_original_price")
final = row.select_one("div.discount_final_price")
app_id = row.get("data-ds-appid")
if not discount or not title:
continue
discount_val = int(discount.text.strip().replace("-", "").replace("%", ""))
if discount_val >= min_discount:
deals.append({
"app_id": app_id,
"title": title.text.strip(),
"original_price": original.text.strip() if original else None,
"sale_price": final.text.strip() if final else None,
"discount": f"-{discount_val}%",
})
return deals
deals = find_best_deals(min_discount=60)
for d in deals[:10]:
print(f"{d['discount']} {d['title']}: {d['original_price']} -> {d['sale_price']}")
Best Practices
- Use the API first — Steam's JSON endpoints are more reliable than HTML scraping
- Respect rate limits — 1-2 second delays between requests, max 200/5min
- Cache aggressively — game metadata rarely changes, cache it for 24h+
- Set the birthtime cookie — avoids age-check redirects for mature games
- Use proxies for scale — ScrapeOps or ThorData for high-volume scraping
- Handle regional differences — prices and availability vary by country
Wrapping Up
Steam is one of the most scraper-friendly platforms thanks to its public API endpoints. For small projects, the built-in APIs with requests are all you need. For production-scale data collection, use the Steam Scraper on Apify or pair your code with a proxy service for reliability.
All the code above works as of 2026 — start building and track the data that matters to your project.
Top comments (0)