Building a sports score API is a classic scraping project. Here's how to scrape live scores and serve them through a clean FastAPI endpoint.
Architecture
Three components: a scraper fetching live scores, a Redis cache for fast reads, and a FastAPI endpoint serving structured data.
Setting Up
import requests
from bs4 import BeautifulSoup
import redis
import json
from datetime import datetime
from fastapi import FastAPI
from contextlib import asynccontextmanager
import threading
import time
SCRAPER_KEY = "YOUR_SCRAPERAPI_KEY"
r = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
def fetch(url):
resp = requests.get(
"http://api.scraperapi.com",
params={"api_key": SCRAPER_KEY, "url": url, "render": "true"},
timeout=15
)
return BeautifulSoup(resp.text, "html.parser")
Scraping Live Scores
def scrape_nba_scores():
url = "https://www.espn.com/nba/scoreboard"
soup = fetch(url)
games = []
scoreboard = soup.find_all("section", class_="Scoreboard")
for game in scoreboard:
teams = game.find_all("div", class_="ScoreCell__TeamName")
scores = game.find_all("div", class_="ScoreCell__Score")
status_el = game.find("div", class_="ScoreCell__Time")
if len(teams) >= 2:
game_data = {
"sport": "NBA",
"away_team": teams[0].text.strip(),
"home_team": teams[1].text.strip(),
"away_score": int(scores[0].text) if len(scores) >= 1 and scores[0].text.isdigit() else 0,
"home_score": int(scores[1].text) if len(scores) >= 2 and scores[1].text.isdigit() else 0,
"status": status_el.text.strip() if status_el else "Unknown",
"updated_at": datetime.now().isoformat()
}
game_data["game_id"] = f"nba_{game_data['away_team'][:3]}_{game_data['home_team'][:3]}"
games.append(game_data)
return games
Redis Cache Layer
class ScoreCache:
def __init__(self, redis_client):
self.r = redis_client
self.ttl = 120
def update_scores(self, games):
pipe = self.r.pipeline()
for game in games:
key = f"score:{game['game_id']}"
pipe.set(key, json.dumps(game), ex=self.ttl)
pipe.sadd(f"games:{game['sport'].lower()}", game["game_id"])
pipe.expire(f"games:{game['sport'].lower()}", self.ttl * 2)
pipe.execute()
def get_scores(self, sport=None):
if sport:
game_ids = self.r.smembers(f"games:{sport.lower()}")
else:
game_ids = set()
for s in ["nba", "nfl", "mlb", "nhl"]:
game_ids.update(self.r.smembers(f"games:{s}"))
scores = []
for gid in game_ids:
data = self.r.get(f"score:{gid}")
if data:
scores.append(json.loads(data))
return sorted(scores, key=lambda x: x.get("updated_at", ""), reverse=True)
def get_game(self, game_id):
data = self.r.get(f"score:{game_id}")
return json.loads(data) if data else None
cache = ScoreCache(r)
Background Scraper
def scraper_loop(interval=60):
scrapers = [("NBA", scrape_nba_scores)]
while True:
for sport, func in scrapers:
try:
games = func()
if games:
cache.update_scores(games)
print(f"Updated {len(games)} {sport} games")
except Exception as e:
print(f"Error {sport}: {e}")
time.sleep(interval)
FastAPI Endpoints
@asynccontextmanager
async def lifespan(app):
thread = threading.Thread(target=scraper_loop, daemon=True)
thread.start()
yield
app = FastAPI(title="Live Sports Scores API", lifespan=lifespan)
@app.get("/api/scores")
def get_all_scores(sport: str = None):
scores = cache.get_scores(sport)
return {"count": len(scores), "scores": scores}
@app.get("/api/scores/{game_id}")
def get_game_score(game_id: str):
game = cache.get_game(game_id)
if not game:
return {"error": "Game not found"}, 404
return game
@app.get("/api/live")
def get_live_games():
all_scores = cache.get_scores()
live = [g for g in all_scores if g["status"] not in ["Final", "Postponed", "Unknown"]]
return {"live_count": len(live), "games": live}
Running It
pip install fastapi uvicorn redis beautifulsoup4 requests
uvicorn scores_api:app --host 0.0.0.0
Scaling with Proxies
- ScraperAPI — JavaScript rendering for ESPN's dynamic scoreboards
- ThorData — High-uptime residential proxies for continuous scraping
- ScrapeOps — Monitor scraper health and alert on failures
Conclusion
A live sports score API combines real-time scraping with caching and clean API design. Start with one sport, validate against official scores, then expand to a multi-sport platform.
Top comments (0)