How to Scrape Motorsport Data: F1, NASCAR, and MotoGP Stats

#python #tutorial #webdev #programming

How to Scrape Motorsport Data: F1, NASCAR, and MotoGP Stats

Motorsport data is rich with statistics — lap times, driver standings, pit strategies, and historical records. Let's build scrapers for the three biggest racing series.

Data Sources

Ergast API — Free F1 data going back to 1950
NASCAR.com — Live and historical results
MotoGP.com — Race results and rider stats

Setting Up

pip install requests beautifulsoup4 pandas matplotlib

Formula 1: Ergast API

The Ergast API is completely free with no authentication required:

import requests
import pandas as pd

ERGAST_BASE = "https://ergast.com/api/f1"

def get_f1_standings(season="current"):
    url = f"{ERGAST_BASE}/{season}/driverStandings.json"
    resp = requests.get(url)
    data = resp.json()
    standings = []
    for item in data["MRData"]["StandingsTable"]["StandingsLists"][0]["DriverStandings"]:
        driver = item["Driver"]
        standings.append({
            "position": int(item["position"]),
            "driver": f"{driver['givenName']} {driver['familyName']}",
            "nationality": driver["nationality"],
            "team": item["Constructors"][0]["name"],
            "points": float(item["points"]),
            "wins": int(item["wins"])
        })
    return pd.DataFrame(standings)

df = get_f1_standings("2025")
print(df.to_string(index=False))

F1 Race Results

def get_race_results(season, round_num):
    url = f"{ERGAST_BASE}/{season}/{round_num}/results.json"
    resp = requests.get(url)
    data = resp.json()
    results = []
    race = data["MRData"]["RaceTable"]["Races"][0]
    for r in race["Results"]:
        results.append({
            "position": r["position"],
            "driver": f"{r['Driver']['givenName']} {r['Driver']['familyName']}",
            "team": r["Constructor"]["name"],
            "grid": r["grid"],
            "status": r["status"],
            "time": r.get("Time", {}).get("time", "DNF"),
        })
    return race["raceName"], pd.DataFrame(results)

name, results = get_race_results("2025", 1)
print(f"\n{name}:")
print(results.head(10).to_string(index=False))

Historical Win Analysis

def get_all_winners(start_year=2000, end_year=2025):
    all_winners = []
    for year in range(start_year, end_year + 1):
        url = f"{ERGAST_BASE}/{year}/results/1.json?limit=50"
        resp = requests.get(url)
        races = resp.json()["MRData"]["RaceTable"]["Races"]
        for race in races:
            winner = race["Results"][0]
            all_winners.append({
                "year": year,
                "race": race["raceName"],
                "driver": f"{winner['Driver']['givenName']} {winner['Driver']['familyName']}",
                "team": winner["Constructor"]["name"]
            })
    return pd.DataFrame(all_winners)

winners = get_all_winners(2015, 2025)
print("\nMost wins 2015-2025:")
print(winners["driver"].value_counts().head(10))

Scraping NASCAR Results

NASCAR doesn't have a public API, so we scrape with ScraperAPI:

from bs4 import BeautifulSoup

def scrape_nascar_results(year=2025):
    params = {
        "api_key": "YOUR_SCRAPERAPI_KEY",
        "url": f"https://www.nascar.com/results/{year}/cup-series/",
        "render": "true"
    }
    resp = requests.get("https://api.scraperapi.com", params=params)
    soup = BeautifulSoup(resp.text, "html.parser")
    races = []
    for row in soup.select(".race-results-row"):
        race_name = row.select_one(".race-name")
        winner = row.select_one(".winner-name")
        if race_name and winner:
            races.append({
                "race": race_name.get_text(strip=True),
                "winner": winner.get_text(strip=True)
            })
    return races

For MotoGP and other platforms, ThorData residential proxies ensure reliable access. Monitor health with ScrapeOps.

Visualization

import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(12, 6))
for driver in df.head(5)["driver"]:
    driver_data = df[df["driver"] == driver]
    ax.barh(driver, driver_data["points"].values[0])
ax.set_xlabel("Points")
ax.set_title("F1 Championship Standings")
plt.tight_layout()
plt.savefig("f1_standings.png")

Key Takeaways

The Ergast API provides free, structured F1 data back to 1950
NASCAR and MotoGP require web scraping with JS rendering
Historical analysis reveals dominance patterns and era shifts
Combining series data enables cross-motorsport comparisons

The Ergast API is a free community resource. For other sites, respect robots.txt and rate limits.

DEV Community

How to Scrape Motorsport Data: F1, NASCAR, and MotoGP Stats