Scraping API Rate Limits: Discovering Undocumented Constraints with Python

#python #programming #tutorial #webdev

Every API has rate limits. Some document them clearly. Most don't. Whether you're building an integration, stress-testing your own service, or researching API behavior, knowing the actual rate limits saves you from mysterious 429 errors and silent throttling.

This tutorial shows you how to systematically discover and document API rate limits using Python.

Why Rate Limits Matter

Undocumented rate limits cause real problems:

Your production integration breaks at 2 AM because you hit an unknown threshold
Your scraper gets silently throttled, returning stale cached data instead of errors
Your competitor's API wrapper works better because they reverse-engineered the limits

The Rate Limit Discovery Framework

pip install requests aiohttp pandas

import requests
import time
import json
from datetime import datetime
from dataclasses import dataclass, field

@dataclass
class RateLimitResult:
    endpoint: str
    requests_sent: int
    first_rejection_at: int | None
    rejection_code: int | None
    headers_found: dict = field(default_factory=dict)
    observed_window: float = 0.0
    effective_rate: float = 0.0

class RateLimitProber:
    def __init__(self, base_url: str, auth_headers: dict = None):
        self.base_url = base_url
        self.session = requests.Session()
        if auth_headers:
            self.session.headers.update(auth_headers)

    def probe_endpoint(self, path: str, method: str = "GET",
                       max_requests: int = 200, 
                       delay_ms: int = 50) -> RateLimitResult:
        url = f"{self.base_url}{path}"
        result = RateLimitResult(endpoint=path, requests_sent=0,
                                 first_rejection_at=None, rejection_code=None)

        start_time = time.monotonic()

        for i in range(max_requests):
            try:
                if method == "GET":
                    resp = self.session.get(url, timeout=10)
                else:
                    resp = self.session.post(url, json={}, timeout=10)

                result.requests_sent = i + 1

                rl_headers = self._extract_rate_headers(resp.headers)
                if rl_headers:
                    result.headers_found = rl_headers

                if resp.status_code == 429:
                    result.first_rejection_at = i + 1
                    result.rejection_code = 429
                    break

                if resp.status_code in (403, 503) and i > 10:
                    if self._is_rate_limit(resp):
                        result.first_rejection_at = i + 1
                        result.rejection_code = resp.status_code
                        break

                time.sleep(delay_ms / 1000)

            except requests.exceptions.Timeout:
                pass

        elapsed = time.monotonic() - start_time
        result.observed_window = elapsed
        result.effective_rate = result.requests_sent / elapsed if elapsed > 0 else 0

        return result

    def _extract_rate_headers(self, headers) -> dict:
        rate_headers = {}
        patterns = [
            "X-RateLimit-Limit", "X-RateLimit-Remaining",
            "X-RateLimit-Reset", "Retry-After",
            "RateLimit-Limit", "RateLimit-Remaining", "RateLimit-Reset"
        ]
        for p in patterns:
            val = headers.get(p)
            if val:
                rate_headers[p] = val
        return rate_headers

    def _is_rate_limit(self, resp) -> bool:
        body = resp.text.lower()
        indicators = ["rate limit", "too many requests", "throttl", "slow down", "quota exceeded"]
        return any(ind in body for ind in indicators)

Detecting Silent Throttling

Some APIs don't return errors. They just slow down responses:

import statistics

def detect_throttling(base_url: str, path: str, sample_size: int = 100) -> dict:
    session = requests.Session()
    url = f"{base_url}{path}"
    latencies = []

    for i in range(sample_size):
        start = time.monotonic()
        resp = session.get(url, timeout=30)
        latency = (time.monotonic() - start) * 1000
        latencies.append(latency)
        time.sleep(0.05)

    first_half = latencies[:len(latencies)//2]
    second_half = latencies[len(latencies)//2:]

    return {
        "mean_ms": statistics.mean(latencies),
        "p95_ms": sorted(latencies)[int(len(latencies) * 0.95)],
        "first_half_mean": statistics.mean(first_half),
        "second_half_mean": statistics.mean(second_half),
        "slowdown_factor": statistics.mean(second_half) / statistics.mean(first_half),
        "likely_throttled": statistics.mean(second_half) > statistics.mean(first_half) * 1.5
    }

Window Size Detection

def find_rate_window(prober: RateLimitProber, path: str) -> dict:
    result = prober.probe_endpoint(path, max_requests=500, delay_ms=10)

    if not result.first_rejection_at:
        return {"window": "unknown", "note": "No limit found in 500 requests"}

    limit = result.first_rejection_at
    windows = [60, 300, 900, 3600]

    for window_sec in windows:
        print(f"Waiting {window_sec}s to test window reset...")
        time.sleep(window_sec)

        resp = prober.session.get(f"{prober.base_url}{path}", timeout=10)
        if resp.status_code != 429:
            return {"limit": limit, "window_seconds": window_sec,
                    "rate": f"{limit} requests per {window_sec}s"}

    return {"limit": limit, "window": ">1 hour"}

Running the Analysis

def analyze_api(base_url: str, endpoints: list[str], auth: dict = None):
    prober = RateLimitProber(base_url, auth)

    print(f"Rate Limit Analysis: {base_url}")

    for endpoint in endpoints:
        print(f"\nProbing {endpoint}...")
        result = prober.probe_endpoint(endpoint)

        limit = result.first_rejection_at or "None detected"
        print(f"  Requests before rejection: {limit}")
        print(f"  Rejection code: {result.rejection_code}")
        print(f"  Rate headers: {json.dumps(result.headers_found, indent=2)}")
        print(f"  Effective rate: {result.effective_rate:.1f} req/s")

        time.sleep(5)

analyze_api(
    "https://api.example.com",
    ["/v1/users", "/v1/search", "/v1/data"],
    auth={"Authorization": "Bearer YOUR_TOKEN"}
)

Proxy Rotation for Scale

When probing APIs at scale, you need to test rate limits per-IP vs per-account. ScraperAPI lets you rotate IPs automatically to test whether limits are IP-based or token-based.

For residential IP testing, ThorData provides geo-distributed proxies. Monitor your probing runs with ScrapeOps dashboards.

Ethical Considerations

Only probe APIs you have legitimate access to
Use reasonable delays between requests
The goal is discovery, not denial of service
Document findings and share responsibly
Respect robots.txt and ToS

Rate limit discovery is a legitimate engineering practice that helps you build more resilient integrations. The best API consumers are the ones who understand and respect the limits.

Skip the Build

You don't have to reinvent this. We maintain a production-grade scraper as an Apify actor — proxies, anti-bot, retries, and schema all handled. You can run it on a pay-per-result basis and get clean JSON without writing a single line of scraping code.

our Apify scrapers on Apify