Earnings surprises drive massive stock movements. In this tutorial, I'll show you how to build a real-time earnings surprise tracker by scraping financial data sources.
Why Track Earnings Surprises?
When a company reports earnings that differ significantly from Wall Street estimates, the stock can move 5-20% in minutes. A live tracker helps you:
- Monitor earnings season in real time
- Identify sector outperformance patterns
- Build datasets for quantitative trading strategies
- Create alerts for significant beats or misses
Project Setup
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import json
import time
SCRAPER_KEY = "YOUR_SCRAPERAPI_KEY"
def scrape_page(url):
resp = requests.get(
"http://api.scraperapi.com",
params={"api_key": SCRAPER_KEY, "url": url},
timeout=30
)
return BeautifulSoup(resp.text, "html.parser")
Scraping the Earnings Calendar
def get_earnings_calendar(date_str):
url = f"https://finance.yahoo.com/calendar/earnings?day={date_str}"
soup = scrape_page(url)
earnings = []
table = soup.find("table")
if not table:
return pd.DataFrame()
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) >= 6:
earnings.append({
"symbol": cols[0].text.strip(),
"company": cols[1].text.strip(),
"eps_estimate": parse_currency(cols[2].text),
"eps_actual": parse_currency(cols[3].text),
"revenue_estimate": parse_large_number(cols[4].text),
"revenue_actual": parse_large_number(cols[5].text),
"scraped_at": datetime.now().isoformat()
})
return pd.DataFrame(earnings)
def parse_currency(text):
try:
return float(text.strip().replace("$", "").replace(",", ""))
except ValueError:
return None
def parse_large_number(text):
text = text.strip().replace(",", "")
multipliers = {"B": 1e9, "M": 1e6, "K": 1e3}
for suffix, mult in multipliers.items():
if text.endswith(suffix):
return float(text[:-1]) * mult
try:
return float(text)
except ValueError:
return None
Calculating Surprise Metrics
def calculate_surprises(df):
df = df.dropna(subset=["eps_estimate", "eps_actual"])
df["eps_surprise_pct"] = (
(df["eps_actual"] - df["eps_estimate"]) / abs(df["eps_estimate"]) * 100
).round(2)
df["surprise_category"] = pd.cut(
df["eps_surprise_pct"],
bins=[-float("inf"), -10, -2, 2, 10, float("inf")],
labels=["big_miss", "miss", "inline", "beat", "big_beat"]
)
return df.sort_values("eps_surprise_pct", ascending=False)
Building the Live Tracker
def run_live_tracker(check_interval=300):
today = datetime.now().strftime("%Y-%m-%d")
seen_symbols = set()
print(f"Live Earnings Tracker - {today}")
while True:
df = get_earnings_calendar(today)
if df.empty:
time.sleep(check_interval)
continue
df = calculate_surprises(df)
new_reports = df[~df["symbol"].isin(seen_symbols)]
for _, row in new_reports.iterrows():
if row["eps_actual"] is not None:
direction = "BEAT" if row["eps_surprise_pct"] > 0 else "MISS"
print(f"[{direction}] {row['symbol']} | EPS: ${row['eps_actual']} vs ${row['eps_estimate']} | "
f"Surprise: {row['eps_surprise_pct']:+.1f}%")
seen_symbols.add(row["symbol"])
time.sleep(check_interval)
Proxy Strategy for Financial Sites
- ScraperAPI — Best for Yahoo Finance with JavaScript rendering
- ThorData — Residential proxies for Bloomberg and Reuters
- ScrapeOps — Track success rates across financial domains
Conclusion
A live earnings surprise tracker combines web scraping with financial analysis. Start with the calendar scraper, add surprise calculations, and build alerting on top. Pair it with price data to measure post-earnings drift patterns.
Top comments (0)