Scraping Historical Stock Options and Derivatives Data with Python

#python #tutorial #webdev #programming

Scraping Historical Stock Options and Derivatives Data with Python

Options and derivatives data is essential for quantitative analysis, backtesting strategies, and risk modeling. While premium data feeds cost thousands monthly, much of this data is publicly accessible on financial websites.

What Data Are We After?

Strike prices and expiration dates
Bid/ask spreads and volume
Open interest across chains
Historical implied volatility

Setup

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime, timedelta

PROXY_URL = "https://api.scraperapi.com"
API_KEY = "YOUR_SCRAPERAPI_KEY"

Financial sites are aggressive with anti-bot measures. ScraperAPI handles fingerprinting and rate limiting automatically.

Scraping Options Chains

def get_options_chain(ticker, expiration=None):
    url = f"https://finance.yahoo.com/quote/{ticker}/options/"
    if expiration:
        url += f"?date={expiration}"

    params = {
        "api_key": API_KEY,
        "url": url,
        "render": "true"
    }
    response = requests.get(PROXY_URL, params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    calls = parse_options_table(soup, "calls")
    puts = parse_options_table(soup, "puts")
    return {"calls": calls, "puts": puts, "ticker": ticker}

def parse_options_table(soup, option_type):
    options = []
    table = soup.select_one(f"table.{option_type}")
    if not table:
        return options
    headers = [th.text.strip() for th in table.select("thead th")]
    for row in table.select("tbody tr"):
        cells = [td.text.strip() for td in row.select("td")]
        if len(cells) == len(headers):
            options.append(dict(zip(headers, cells)))
    return options

Historical Volatility Calculator

def scrape_historical_prices(ticker, period="1y"):
    end = int(datetime.now().timestamp())
    start = int((datetime.now() - timedelta(days=365)).timestamp())

    params = {
        "api_key": API_KEY,
        "url": f"https://finance.yahoo.com/quote/{ticker}/history/?period1={start}&period2={end}"
    }
    response = requests.get(PROXY_URL, params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    rows = []
    table = soup.select_one("table[data-testid='history-table']")
    if table:
        for row in table.select("tbody tr"):
            cells = [td.text.strip() for td in row.select("td")]
            if len(cells) >= 6:
                rows.append({"date": cells[0], "close": cells[4], "volume": cells[5]})
    return rows

def calculate_historical_vol(prices, window=30):
    df = pd.DataFrame(prices)
    df["close"] = pd.to_numeric(df["close"].str.replace(",", ""), errors="coerce")
    df["returns"] = df["close"].pct_change()
    df["vol"] = df["returns"].rolling(window).std() * (252 ** 0.5)
    return df

Unusual Activity Scanner

def scan_unusual_activity(tickers):
    unusual = []
    for ticker in tickers:
        chain = get_options_chain(ticker)
        for opt_type in ["calls", "puts"]:
            for opt in chain[opt_type]:
                volume = int(opt.get("Volume", "0").replace(",", "") or 0)
                oi = int(opt.get("Open Interest", "0").replace(",", "") or 0)
                if oi > 0 and volume > oi * 2:
                    unusual.append({
                        "ticker": ticker, "type": opt_type,
                        "strike": opt.get("Strike", ""),
                        "volume": volume, "open_interest": oi,
                        "ratio": round(volume / oi, 2)
                    })
        time.sleep(5)
    return sorted(unusual, key=lambda x: x["ratio"], reverse=True)

Infrastructure Recommendations

ScraperAPI — handles Yahoo Finance's anti-bot protection
ThorData — residential proxies for financial data sources
ScrapeOps — monitor success rates across financial pipelines

Conclusion

Building your own options data pipeline eliminates expensive data vendor costs. Start with daily snapshots, build historical depth, and power any quantitative strategy with your own data asset.