Financial Data Scraping in 2026
Financial data drives trading algorithms, market research, portfolio analytics, and economic forecasting. While premium data feeds exist (Bloomberg Terminal, Refinitiv), many developers need free or low-cost alternatives.
This guide covers practical approaches to collecting stock prices, cryptocurrency data, and economic indicators using Python.
Stock Market Data
Free API Sources
Before scraping, check if an API exists. Several provide free stock data:
import requests
import pandas as pd
from datetime import datetime, timedelta
# Yahoo Finance (via yfinance)
import yfinance as yf
def get_stock_data(ticker, period='1y'):
stock = yf.Ticker(ticker)
hist = stock.history(period=period)
return hist
# Example
aapl = get_stock_data('AAPL')
print(f'AAPL last price: ${aapl["Close"].iloc[-1]:.2f}')
print(f'52-week high: ${aapl["High"].max():.2f}')
print(f'52-week low: ${aapl["Low"].min():.2f}')
Scraping Real-Time Quotes
When APIs don't have what you need, scraping fills the gap:
from bs4 import BeautifulSoup
import requests
import json
def scrape_stock_quote(ticker):
url = f'https://finance.yahoo.com/quote/{ticker}'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# Look for JSON data in script tags
scripts = soup.find_all('script')
for script in scripts:
if script.string and 'QuoteSummaryStore' in (script.string or ''):
# Extract JSON data from the page's embedded data
start = script.string.find('{"QuoteSummaryStore"')
if start > -1:
data = json.loads(script.string[start:script.string.find(';', start)])
price_data = data.get('QuoteSummaryStore', {}).get('price', {})
return {
'symbol': ticker,
'price': price_data.get('regularMarketPrice', {}).get('raw'),
'change': price_data.get('regularMarketChange', {}).get('raw'),
'change_pct': price_data.get('regularMarketChangePercent', {}).get('raw'),
'volume': price_data.get('regularMarketVolume', {}).get('raw'),
'market_cap': price_data.get('marketCap', {}).get('raw'),
}
return None
quote = scrape_stock_quote('AAPL')
print(json.dumps(quote, indent=2))
Earnings & Financial Statements
def get_financials(ticker):
stock = yf.Ticker(ticker)
return {
'income_statement': stock.financials,
'balance_sheet': stock.balance_sheet,
'cash_flow': stock.cashflow,
'earnings': stock.earnings_dates,
}
def get_key_metrics(ticker):
stock = yf.Ticker(ticker)
info = stock.info
return {
'pe_ratio': info.get('trailingPE'),
'forward_pe': info.get('forwardPE'),
'peg_ratio': info.get('pegRatio'),
'price_to_book': info.get('priceToBook'),
'dividend_yield': info.get('dividendYield'),
'revenue': info.get('totalRevenue'),
'profit_margin': info.get('profitMargins'),
}
Cryptocurrency Data
Crypto data is more accessible than traditional finance thanks to open APIs and blockchain transparency.
CoinGecko API (Free)
def get_crypto_prices(coins, vs_currency='usd'):
url = 'https://api.coingecko.com/api/v3/simple/price'
params = {
'ids': ','.join(coins),
'vs_currencies': vs_currency,
'include_24hr_change': 'true',
'include_market_cap': 'true',
'include_24hr_vol': 'true',
}
response = requests.get(url, params=params)
return response.json()
# Top coins
prices = get_crypto_prices(['bitcoin', 'ethereum', 'solana'])
for coin, data in prices.items():
print(f'{coin}: ${data["usd"]:,.2f} ({data["usd_24h_change"]:+.2f}%)')
On-Chain Data Scraping
def get_eth_gas_prices():
url = 'https://api.etherscan.io/api'
params = {
'module': 'gastracker',
'action': 'gasoracle',
'apikey': 'YOUR_ETHERSCAN_KEY' # Free tier available
}
response = requests.get(url, params=params)
data = response.json()['result']
return {
'low': int(data['SafeGasPrice']),
'average': int(data['ProposeGasPrice']),
'high': int(data['FastGasPrice']),
}
def get_whale_transactions(min_value_usd=1000000):
"""Track large crypto transfers"""
url = 'https://api.whale-alert.io/v1/transactions'
params = {
'api_key': 'YOUR_KEY',
'min_value': min_value_usd,
'cursor': None,
}
response = requests.get(url, params=params)
return response.json().get('transactions', [])
Crypto Signal Scanning
For automated crypto signal detection across multiple exchanges and indicators, the Crypto Signals Scanner on Apify provides real-time technical analysis signals (RSI, MACD, moving average crossovers) without building your own scanner infrastructure.
Economic Indicators
FRED (Federal Reserve Economic Data)
# pip install fredapi
from fredapi import Fred
fred = Fred(api_key='YOUR_FRED_KEY') # Free at fred.stlouisfed.org
def get_economic_indicators():
indicators = {
'GDP': 'GDP',
'Unemployment': 'UNRATE',
'CPI': 'CPIAUCSL',
'Fed_Funds_Rate': 'FEDFUNDS',
'M2_Money_Supply': 'M2SL',
'Housing_Starts': 'HOUST',
}
data = {}
for name, series_id in indicators.items():
series = fred.get_series(series_id)
data[name] = {
'latest': float(series.iloc[-1]),
'date': str(series.index[-1].date()),
'yoy_change': float((series.iloc[-1] / series.iloc[-13] - 1) * 100)
if len(series) > 13 else None
}
return data
indicators = get_economic_indicators()
for name, vals in indicators.items():
print(f'{name}: {vals["latest"]:.2f} (as of {vals["date"]})')
Scraping Economic Calendars
def scrape_economic_calendar():
url = 'https://www.investing.com/economic-calendar/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml',
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
events = []
rows = soup.select('#economicCalendarData tr.js-event-item')
for row in rows:
time_el = row.select_one('.time')
currency = row.select_one('.flagCur')
event_name = row.select_one('.event a')
actual = row.select_one('.actual')
forecast = row.select_one('.forecast')
previous = row.select_one('.previous')
impact = row.select_one('.sentiment')
events.append({
'time': time_el.get_text(strip=True) if time_el else None,
'currency': currency.get_text(strip=True) if currency else None,
'event': event_name.get_text(strip=True) if event_name else None,
'actual': actual.get_text(strip=True) if actual else None,
'forecast': forecast.get_text(strip=True) if forecast else None,
'previous': previous.get_text(strip=True) if previous else None,
})
return events
Building a Financial Data Pipeline
import sqlite3
from datetime import datetime
import schedule
class FinancialDataPipeline:
def __init__(self, db_path='financial_data.db'):
self.conn = sqlite3.connect(db_path)
self.setup_tables()
def setup_tables(self):
self.conn.executescript('''
CREATE TABLE IF NOT EXISTS stock_prices (
ticker TEXT, price REAL, volume INTEGER,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS crypto_prices (
coin TEXT, price_usd REAL, change_24h REAL,
market_cap REAL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
);
''')
def collect_stocks(self, tickers):
for ticker in tickers:
data = scrape_stock_quote(ticker)
if data:
self.conn.execute(
'INSERT INTO stock_prices (ticker, price, volume) VALUES (?, ?, ?)',
(ticker, data['price'], data['volume'])
)
self.conn.commit()
def collect_crypto(self, coins):
prices = get_crypto_prices(coins)
for coin, data in prices.items():
self.conn.execute(
'INSERT INTO crypto_prices (coin, price_usd, change_24h, market_cap) VALUES (?, ?, ?, ?)',
(coin, data['usd'], data.get('usd_24h_change'), data.get('usd_market_cap'))
)
self.conn.commit()
# Run pipeline
pipeline = FinancialDataPipeline()
pipeline.collect_stocks(['AAPL', 'GOOGL', 'MSFT', 'TSLA'])
pipeline.collect_crypto(['bitcoin', 'ethereum', 'solana'])
Handling Anti-Bot Protection
Financial sites are heavily protected. For reliable scraping, use a proxy service like ScrapeOps that handles JavaScript rendering and IP rotation. This is especially important for sites like Yahoo Finance and Investing.com that use aggressive bot detection.
Conclusion
Financial data scraping combines API usage (yfinance, CoinGecko, FRED) with traditional web scraping for data points that APIs don't cover. For crypto-specific signals, the Crypto Signals Scanner provides pre-built technical analysis. For reliable access to protected financial sites, pair your scrapers with ScrapeOps proxy infrastructure.
Always verify financial data from multiple sources before making investment decisions — scraping errors in financial data can be expensive.
Top comments (0)