DEV Community

agenthustler
agenthustler

Posted on

How to Build a Real-Time Sports Score API with Scraping

Building a sports score API is a classic scraping project. Here's how to scrape live scores and serve them through a clean FastAPI endpoint.

Architecture

Three components: a scraper fetching live scores, a Redis cache for fast reads, and a FastAPI endpoint serving structured data.

Setting Up

import requests
from bs4 import BeautifulSoup
import redis
import json
from datetime import datetime
from fastapi import FastAPI
from contextlib import asynccontextmanager
import threading
import time

SCRAPER_KEY = "YOUR_SCRAPERAPI_KEY"
r = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)

def fetch(url):
    resp = requests.get(
        "http://api.scraperapi.com",
        params={"api_key": SCRAPER_KEY, "url": url, "render": "true"},
        timeout=15
    )
    return BeautifulSoup(resp.text, "html.parser")
Enter fullscreen mode Exit fullscreen mode

Scraping Live Scores

def scrape_nba_scores():
    url = "https://www.espn.com/nba/scoreboard"
    soup = fetch(url)
    games = []

    scoreboard = soup.find_all("section", class_="Scoreboard")
    for game in scoreboard:
        teams = game.find_all("div", class_="ScoreCell__TeamName")
        scores = game.find_all("div", class_="ScoreCell__Score")
        status_el = game.find("div", class_="ScoreCell__Time")

        if len(teams) >= 2:
            game_data = {
                "sport": "NBA",
                "away_team": teams[0].text.strip(),
                "home_team": teams[1].text.strip(),
                "away_score": int(scores[0].text) if len(scores) >= 1 and scores[0].text.isdigit() else 0,
                "home_score": int(scores[1].text) if len(scores) >= 2 and scores[1].text.isdigit() else 0,
                "status": status_el.text.strip() if status_el else "Unknown",
                "updated_at": datetime.now().isoformat()
            }
            game_data["game_id"] = f"nba_{game_data['away_team'][:3]}_{game_data['home_team'][:3]}"
            games.append(game_data)

    return games
Enter fullscreen mode Exit fullscreen mode

Redis Cache Layer

class ScoreCache:
    def __init__(self, redis_client):
        self.r = redis_client
        self.ttl = 120

    def update_scores(self, games):
        pipe = self.r.pipeline()
        for game in games:
            key = f"score:{game['game_id']}"
            pipe.set(key, json.dumps(game), ex=self.ttl)
            pipe.sadd(f"games:{game['sport'].lower()}", game["game_id"])
            pipe.expire(f"games:{game['sport'].lower()}", self.ttl * 2)
        pipe.execute()

    def get_scores(self, sport=None):
        if sport:
            game_ids = self.r.smembers(f"games:{sport.lower()}")
        else:
            game_ids = set()
            for s in ["nba", "nfl", "mlb", "nhl"]:
                game_ids.update(self.r.smembers(f"games:{s}"))

        scores = []
        for gid in game_ids:
            data = self.r.get(f"score:{gid}")
            if data:
                scores.append(json.loads(data))
        return sorted(scores, key=lambda x: x.get("updated_at", ""), reverse=True)

    def get_game(self, game_id):
        data = self.r.get(f"score:{game_id}")
        return json.loads(data) if data else None

cache = ScoreCache(r)
Enter fullscreen mode Exit fullscreen mode

Background Scraper

def scraper_loop(interval=60):
    scrapers = [("NBA", scrape_nba_scores)]

    while True:
        for sport, func in scrapers:
            try:
                games = func()
                if games:
                    cache.update_scores(games)
                    print(f"Updated {len(games)} {sport} games")
            except Exception as e:
                print(f"Error {sport}: {e}")
        time.sleep(interval)
Enter fullscreen mode Exit fullscreen mode

FastAPI Endpoints

@asynccontextmanager
async def lifespan(app):
    thread = threading.Thread(target=scraper_loop, daemon=True)
    thread.start()
    yield

app = FastAPI(title="Live Sports Scores API", lifespan=lifespan)

@app.get("/api/scores")
def get_all_scores(sport: str = None):
    scores = cache.get_scores(sport)
    return {"count": len(scores), "scores": scores}

@app.get("/api/scores/{game_id}")
def get_game_score(game_id: str):
    game = cache.get_game(game_id)
    if not game:
        return {"error": "Game not found"}, 404
    return game

@app.get("/api/live")
def get_live_games():
    all_scores = cache.get_scores()
    live = [g for g in all_scores if g["status"] not in ["Final", "Postponed", "Unknown"]]
    return {"live_count": len(live), "games": live}
Enter fullscreen mode Exit fullscreen mode

Running It

pip install fastapi uvicorn redis beautifulsoup4 requests
uvicorn scores_api:app --host 0.0.0.0
Enter fullscreen mode Exit fullscreen mode

Scaling with Proxies

  • ScraperAPI — JavaScript rendering for ESPN's dynamic scoreboards
  • ThorData — High-uptime residential proxies for continuous scraping
  • ScrapeOps — Monitor scraper health and alert on failures

Conclusion

A live sports score API combines real-time scraping with caching and clean API design. Start with one sport, validate against official scores, then expand to a multi-sport platform.

Top comments (0)