Sports data is big business. From fantasy sports to betting analytics, there's massive demand for real-time statistics, historical data, and odds comparison.
Scraping Sports Statistics
import requests
from bs4 import BeautifulSoup
class SportsStats:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({'User-Agent': 'Mozilla/5.0'})
def scrape_standings(self):
url = 'https://www.basketball-reference.com/leagues/NBA_2026_standings.html'
resp = self.session.get(url)
soup = BeautifulSoup(resp.text, 'html.parser')
standings = []
table = soup.select_one('#confs_standings_E')
if table:
for row in table.select('tbody tr.full_table'):
cols = row.select('td')
team = row.select_one('th a')
if team and len(cols) >= 3:
standings.append({
'team': team.get_text(strip=True),
'wins': cols[0].get_text(strip=True),
'losses': cols[1].get_text(strip=True)
})
return standings
Live Score Tracking
from datetime import datetime
import time
class LiveScores:
def get_scores(self):
url = 'https://www.thesportsdb.com/api/v1/json/3/eventsday.php'
resp = requests.get(url, params={
'd': datetime.now().strftime('%Y-%m-%d'), 's': 'NBA'
})
return [{
'home': e.get('strHomeTeam'),
'away': e.get('strAwayTeam'),
'home_score': e.get('intHomeScore'),
'away_score': e.get('intAwayScore'),
'status': e.get('strStatus')
} for e in resp.json().get('events', []) or []]
def monitor(self, interval=30):
while True:
for g in self.get_scores():
print(f"{g['home']} {g['home_score']} - {g['away_score']} {g['away']}")
time.sleep(interval)
Odds Comparison
class OddsScraper:
def get_odds(self, api_key, sport='basketball_nba'):
url = f'https://api.the-odds-api.com/v4/sports/{sport}/odds'
resp = requests.get(url, params={
'apiKey': api_key, 'regions': 'us',
'markets': 'h2h,spreads', 'oddsFormat': 'american'
})
games = []
for g in resp.json():
odds = {}
for bk in g.get('bookmakers', []):
for mkt in bk['markets']:
if mkt['key'] == 'h2h':
for o in mkt['outcomes']:
odds[f"{bk['title']}_{o['name']}"] = o['price']
games.append({'home': g['home_team'], 'away': g['away_team'], 'odds': odds})
return games
Data Storage
import sqlite3
class SportsDB:
def __init__(self, path='sports.db'):
self.conn = sqlite3.connect(path)
self.conn.execute('''CREATE TABLE IF NOT EXISTS games (
id INTEGER PRIMARY KEY, sport TEXT, home TEXT, away TEXT,
home_score INTEGER, away_score INTEGER, game_date DATE)''')
def save(self, game):
self.conn.execute('INSERT INTO games (sport, home, away, home_score, away_score, game_date) VALUES (?,?,?,?,?,?)',
(game['sport'], game['home'], game['away'], game['home_score'], game['away_score'], game['date']))
self.conn.commit()
Scaling
ScraperAPI handles proxy rotation for stats sites. ThorData provides fast residential proxies, and ScrapeOps monitors pipeline health.
Conclusion
Build a solid database layer, use APIs where available, and complement with scraping for unavailable data.
Top comments (0)