Scraping Historical Stock Options and Derivatives Data with Python
Options and derivatives data is essential for quantitative analysis, backtesting strategies, and risk modeling. While premium data feeds cost thousands monthly, much of this data is publicly accessible on financial websites.
What Data Are We After?
- Strike prices and expiration dates
- Bid/ask spreads and volume
- Open interest across chains
- Historical implied volatility
Setup
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime, timedelta
PROXY_URL = "https://api.scraperapi.com"
API_KEY = "YOUR_SCRAPERAPI_KEY"
Financial sites are aggressive with anti-bot measures. ScraperAPI handles fingerprinting and rate limiting automatically.
Scraping Options Chains
def get_options_chain(ticker, expiration=None):
url = f"https://finance.yahoo.com/quote/{ticker}/options/"
if expiration:
url += f"?date={expiration}"
params = {
"api_key": API_KEY,
"url": url,
"render": "true"
}
response = requests.get(PROXY_URL, params=params)
soup = BeautifulSoup(response.text, "html.parser")
calls = parse_options_table(soup, "calls")
puts = parse_options_table(soup, "puts")
return {"calls": calls, "puts": puts, "ticker": ticker}
def parse_options_table(soup, option_type):
options = []
table = soup.select_one(f"table.{option_type}")
if not table:
return options
headers = [th.text.strip() for th in table.select("thead th")]
for row in table.select("tbody tr"):
cells = [td.text.strip() for td in row.select("td")]
if len(cells) == len(headers):
options.append(dict(zip(headers, cells)))
return options
Historical Volatility Calculator
def scrape_historical_prices(ticker, period="1y"):
end = int(datetime.now().timestamp())
start = int((datetime.now() - timedelta(days=365)).timestamp())
params = {
"api_key": API_KEY,
"url": f"https://finance.yahoo.com/quote/{ticker}/history/?period1={start}&period2={end}"
}
response = requests.get(PROXY_URL, params=params)
soup = BeautifulSoup(response.text, "html.parser")
rows = []
table = soup.select_one("table[data-testid='history-table']")
if table:
for row in table.select("tbody tr"):
cells = [td.text.strip() for td in row.select("td")]
if len(cells) >= 6:
rows.append({"date": cells[0], "close": cells[4], "volume": cells[5]})
return rows
def calculate_historical_vol(prices, window=30):
df = pd.DataFrame(prices)
df["close"] = pd.to_numeric(df["close"].str.replace(",", ""), errors="coerce")
df["returns"] = df["close"].pct_change()
df["vol"] = df["returns"].rolling(window).std() * (252 ** 0.5)
return df
Unusual Activity Scanner
def scan_unusual_activity(tickers):
unusual = []
for ticker in tickers:
chain = get_options_chain(ticker)
for opt_type in ["calls", "puts"]:
for opt in chain[opt_type]:
volume = int(opt.get("Volume", "0").replace(",", "") or 0)
oi = int(opt.get("Open Interest", "0").replace(",", "") or 0)
if oi > 0 and volume > oi * 2:
unusual.append({
"ticker": ticker, "type": opt_type,
"strike": opt.get("Strike", ""),
"volume": volume, "open_interest": oi,
"ratio": round(volume / oi, 2)
})
time.sleep(5)
return sorted(unusual, key=lambda x: x["ratio"], reverse=True)
Infrastructure Recommendations
- ScraperAPI — handles Yahoo Finance's anti-bot protection
- ThorData — residential proxies for financial data sources
- ScrapeOps — monitor success rates across financial pipelines
Conclusion
Building your own options data pipeline eliminates expensive data vendor costs. Start with daily snapshots, build historical depth, and power any quantitative strategy with your own data asset.
Top comments (0)