Accessing financial data programmatically is essential for any quant trader. Here's how to do it responsibly and efficiently.
Data Sources
Free APIs
import requests
# Yahoo Finance (unofficial)
def get_stock_data(symbol, period='1mo'):
url = f'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}'
params = {'range': period, 'interval': '1d'}
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
return r.json()
# CoinGecko (crypto)
def get_crypto_price(coin_id):
url = f'https://api.coingecko.com/api/v3/simple/price'
params = {'ids': coin_id, 'vs_currencies': 'usd'}
return requests.get(url).json()
Economic Calendar
from datetime import datetime
def get_forex_factory_events(date=None):
"""Parse economic events - be respectful of rate limits"""
if date is None:
date = datetime.now().strftime('%Y-%m-%d')
# Use the ForexFactory RSS or API
# Always add delays between requests
pass
Best Practices
1. Respect Rate Limits
import time
class RateLimiter:
def __init__(self, calls_per_second=1):
self.delay = 1.0 / calls_per_second
self.last_call = 0
def wait(self):
elapsed = time.time() - self.last_call
if elapsed < self.delay:
time.sleep(self.delay - elapsed)
self.last_call = time.time()
limiter = RateLimiter(calls_per_second=2)
2. Cache Results
import json
import hashlib
from pathlib import Path
CACHE_DIR = Path('cache')
CACHE_DIR.mkdir(exist_ok=True)
def cached_request(url, ttl_hours=1):
cache_key = hashlib.md5(url.encode()).hexdigest()
cache_file = CACHE_DIR / f'{cache_key}.json'
if cache_file.exists():
data = json.loads(cache_file.read_text())
age_hours = (time.time() - data['timestamp']) / 3600
if age_hours < ttl_hours:
return data['content']
response = requests.get(url)
cache_data = {
'timestamp': time.time(),
'content': response.json()
}
cache_file.write_text(json.dumps(cache_data))
return cache_data['content']
3. Handle Failures Gracefully
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10))
def fetch_with_retry(url):
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json()
4. Store Data Properly
import sqlite3
import pandas as pd
def store_ohlcv(df, symbol, db_path='market_data.db'):
conn = sqlite3.connect(db_path)
df['symbol'] = symbol
df.to_sql('ohlcv', conn, if_exists='append', index=False)
conn.close()
Building a Data Pipeline
class MarketDataPipeline:
def __init__(self, symbols):
self.symbols = symbols
self.limiter = RateLimiter(2)
self.db = sqlite3.connect('market_data.db')
def run_daily(self):
for symbol in self.symbols:
self.limiter.wait()
try:
data = self.fetch(symbol)
self.store(symbol, data)
except Exception as e:
print(f'Error {symbol}: {e}')
continue
def fetch(self, symbol):
return cached_request(
f'https://api.example.com/v1/bars/{symbol}',
ttl_hours=4
)
def store(self, symbol, data):
df = pd.DataFrame(data)
store_ohlcv(df, symbol)
Legal Considerations
- Always check the site's robots.txt and ToS
- Use official APIs when available
- Don't overload servers
- Cache aggressively to reduce requests
- Consider paid data providers for production use
Financial data is the foundation of any trading analysis. Whether you're building indicators, backtesting strategies, or comparing firms on platforms like propfirmkey.com, clean and reliable data makes all the difference.
What data sources do you use for your trading analysis?
Top comments (0)