Every API has rate limits. Some document them clearly. Most don't. Whether you're building an integration, stress-testing your own service, or researching API behavior, knowing the actual rate limits saves you from mysterious 429 errors and silent throttling.
This tutorial shows you how to systematically discover and document API rate limits using Python.
Why Rate Limits Matter
Undocumented rate limits cause real problems:
- Your production integration breaks at 2 AM because you hit an unknown threshold
- Your scraper gets silently throttled, returning stale cached data instead of errors
- Your competitor's API wrapper works better because they reverse-engineered the limits
The Rate Limit Discovery Framework
pip install requests aiohttp pandas
import requests
import time
import json
from datetime import datetime
from dataclasses import dataclass, field
@dataclass
class RateLimitResult:
endpoint: str
requests_sent: int
first_rejection_at: int | None
rejection_code: int | None
headers_found: dict = field(default_factory=dict)
observed_window: float = 0.0
effective_rate: float = 0.0
class RateLimitProber:
def __init__(self, base_url: str, auth_headers: dict = None):
self.base_url = base_url
self.session = requests.Session()
if auth_headers:
self.session.headers.update(auth_headers)
def probe_endpoint(self, path: str, method: str = "GET",
max_requests: int = 200,
delay_ms: int = 50) -> RateLimitResult:
url = f"{self.base_url}{path}"
result = RateLimitResult(endpoint=path, requests_sent=0,
first_rejection_at=None, rejection_code=None)
start_time = time.monotonic()
for i in range(max_requests):
try:
if method == "GET":
resp = self.session.get(url, timeout=10)
else:
resp = self.session.post(url, json={}, timeout=10)
result.requests_sent = i + 1
rl_headers = self._extract_rate_headers(resp.headers)
if rl_headers:
result.headers_found = rl_headers
if resp.status_code == 429:
result.first_rejection_at = i + 1
result.rejection_code = 429
break
if resp.status_code in (403, 503) and i > 10:
if self._is_rate_limit(resp):
result.first_rejection_at = i + 1
result.rejection_code = resp.status_code
break
time.sleep(delay_ms / 1000)
except requests.exceptions.Timeout:
pass
elapsed = time.monotonic() - start_time
result.observed_window = elapsed
result.effective_rate = result.requests_sent / elapsed if elapsed > 0 else 0
return result
def _extract_rate_headers(self, headers) -> dict:
rate_headers = {}
patterns = [
"X-RateLimit-Limit", "X-RateLimit-Remaining",
"X-RateLimit-Reset", "Retry-After",
"RateLimit-Limit", "RateLimit-Remaining", "RateLimit-Reset"
]
for p in patterns:
val = headers.get(p)
if val:
rate_headers[p] = val
return rate_headers
def _is_rate_limit(self, resp) -> bool:
body = resp.text.lower()
indicators = ["rate limit", "too many requests", "throttl", "slow down", "quota exceeded"]
return any(ind in body for ind in indicators)
Detecting Silent Throttling
Some APIs don't return errors. They just slow down responses:
import statistics
def detect_throttling(base_url: str, path: str, sample_size: int = 100) -> dict:
session = requests.Session()
url = f"{base_url}{path}"
latencies = []
for i in range(sample_size):
start = time.monotonic()
resp = session.get(url, timeout=30)
latency = (time.monotonic() - start) * 1000
latencies.append(latency)
time.sleep(0.05)
first_half = latencies[:len(latencies)//2]
second_half = latencies[len(latencies)//2:]
return {
"mean_ms": statistics.mean(latencies),
"p95_ms": sorted(latencies)[int(len(latencies) * 0.95)],
"first_half_mean": statistics.mean(first_half),
"second_half_mean": statistics.mean(second_half),
"slowdown_factor": statistics.mean(second_half) / statistics.mean(first_half),
"likely_throttled": statistics.mean(second_half) > statistics.mean(first_half) * 1.5
}
Window Size Detection
def find_rate_window(prober: RateLimitProber, path: str) -> dict:
result = prober.probe_endpoint(path, max_requests=500, delay_ms=10)
if not result.first_rejection_at:
return {"window": "unknown", "note": "No limit found in 500 requests"}
limit = result.first_rejection_at
windows = [60, 300, 900, 3600]
for window_sec in windows:
print(f"Waiting {window_sec}s to test window reset...")
time.sleep(window_sec)
resp = prober.session.get(f"{prober.base_url}{path}", timeout=10)
if resp.status_code != 429:
return {"limit": limit, "window_seconds": window_sec,
"rate": f"{limit} requests per {window_sec}s"}
return {"limit": limit, "window": ">1 hour"}
Running the Analysis
def analyze_api(base_url: str, endpoints: list[str], auth: dict = None):
prober = RateLimitProber(base_url, auth)
print(f"Rate Limit Analysis: {base_url}")
for endpoint in endpoints:
print(f"\nProbing {endpoint}...")
result = prober.probe_endpoint(endpoint)
limit = result.first_rejection_at or "None detected"
print(f" Requests before rejection: {limit}")
print(f" Rejection code: {result.rejection_code}")
print(f" Rate headers: {json.dumps(result.headers_found, indent=2)}")
print(f" Effective rate: {result.effective_rate:.1f} req/s")
time.sleep(5)
analyze_api(
"https://api.example.com",
["/v1/users", "/v1/search", "/v1/data"],
auth={"Authorization": "Bearer YOUR_TOKEN"}
)
Proxy Rotation for Scale
When probing APIs at scale, you need to test rate limits per-IP vs per-account. ScraperAPI lets you rotate IPs automatically to test whether limits are IP-based or token-based.
For residential IP testing, ThorData provides geo-distributed proxies. Monitor your probing runs with ScrapeOps dashboards.
Ethical Considerations
- Only probe APIs you have legitimate access to
- Use reasonable delays between requests
- The goal is discovery, not denial of service
- Document findings and share responsibly
- Respect robots.txt and ToS
Rate limit discovery is a legitimate engineering practice that helps you build more resilient integrations. The best API consumers are the ones who understand and respect the limits.
Top comments (0)