DEV Community

agenthustler
agenthustler

Posted on

Scraping API Rate Limits: Discovering Undocumented Constraints with Python

Every API has rate limits. Some document them clearly. Most don't. Whether you're building an integration, stress-testing your own service, or researching API behavior, knowing the actual rate limits saves you from mysterious 429 errors and silent throttling.

This tutorial shows you how to systematically discover and document API rate limits using Python.

Why Rate Limits Matter

Undocumented rate limits cause real problems:

  • Your production integration breaks at 2 AM because you hit an unknown threshold
  • Your scraper gets silently throttled, returning stale cached data instead of errors
  • Your competitor's API wrapper works better because they reverse-engineered the limits

The Rate Limit Discovery Framework

pip install requests aiohttp pandas
Enter fullscreen mode Exit fullscreen mode
import requests
import time
import json
from datetime import datetime
from dataclasses import dataclass, field

@dataclass
class RateLimitResult:
    endpoint: str
    requests_sent: int
    first_rejection_at: int | None
    rejection_code: int | None
    headers_found: dict = field(default_factory=dict)
    observed_window: float = 0.0
    effective_rate: float = 0.0

class RateLimitProber:
    def __init__(self, base_url: str, auth_headers: dict = None):
        self.base_url = base_url
        self.session = requests.Session()
        if auth_headers:
            self.session.headers.update(auth_headers)

    def probe_endpoint(self, path: str, method: str = "GET",
                       max_requests: int = 200, 
                       delay_ms: int = 50) -> RateLimitResult:
        url = f"{self.base_url}{path}"
        result = RateLimitResult(endpoint=path, requests_sent=0,
                                 first_rejection_at=None, rejection_code=None)

        start_time = time.monotonic()

        for i in range(max_requests):
            try:
                if method == "GET":
                    resp = self.session.get(url, timeout=10)
                else:
                    resp = self.session.post(url, json={}, timeout=10)

                result.requests_sent = i + 1

                rl_headers = self._extract_rate_headers(resp.headers)
                if rl_headers:
                    result.headers_found = rl_headers

                if resp.status_code == 429:
                    result.first_rejection_at = i + 1
                    result.rejection_code = 429
                    break

                if resp.status_code in (403, 503) and i > 10:
                    if self._is_rate_limit(resp):
                        result.first_rejection_at = i + 1
                        result.rejection_code = resp.status_code
                        break

                time.sleep(delay_ms / 1000)

            except requests.exceptions.Timeout:
                pass

        elapsed = time.monotonic() - start_time
        result.observed_window = elapsed
        result.effective_rate = result.requests_sent / elapsed if elapsed > 0 else 0

        return result

    def _extract_rate_headers(self, headers) -> dict:
        rate_headers = {}
        patterns = [
            "X-RateLimit-Limit", "X-RateLimit-Remaining",
            "X-RateLimit-Reset", "Retry-After",
            "RateLimit-Limit", "RateLimit-Remaining", "RateLimit-Reset"
        ]
        for p in patterns:
            val = headers.get(p)
            if val:
                rate_headers[p] = val
        return rate_headers

    def _is_rate_limit(self, resp) -> bool:
        body = resp.text.lower()
        indicators = ["rate limit", "too many requests", "throttl", "slow down", "quota exceeded"]
        return any(ind in body for ind in indicators)
Enter fullscreen mode Exit fullscreen mode

Detecting Silent Throttling

Some APIs don't return errors. They just slow down responses:

import statistics

def detect_throttling(base_url: str, path: str, sample_size: int = 100) -> dict:
    session = requests.Session()
    url = f"{base_url}{path}"
    latencies = []

    for i in range(sample_size):
        start = time.monotonic()
        resp = session.get(url, timeout=30)
        latency = (time.monotonic() - start) * 1000
        latencies.append(latency)
        time.sleep(0.05)

    first_half = latencies[:len(latencies)//2]
    second_half = latencies[len(latencies)//2:]

    return {
        "mean_ms": statistics.mean(latencies),
        "p95_ms": sorted(latencies)[int(len(latencies) * 0.95)],
        "first_half_mean": statistics.mean(first_half),
        "second_half_mean": statistics.mean(second_half),
        "slowdown_factor": statistics.mean(second_half) / statistics.mean(first_half),
        "likely_throttled": statistics.mean(second_half) > statistics.mean(first_half) * 1.5
    }
Enter fullscreen mode Exit fullscreen mode

Window Size Detection

def find_rate_window(prober: RateLimitProber, path: str) -> dict:
    result = prober.probe_endpoint(path, max_requests=500, delay_ms=10)

    if not result.first_rejection_at:
        return {"window": "unknown", "note": "No limit found in 500 requests"}

    limit = result.first_rejection_at
    windows = [60, 300, 900, 3600]

    for window_sec in windows:
        print(f"Waiting {window_sec}s to test window reset...")
        time.sleep(window_sec)

        resp = prober.session.get(f"{prober.base_url}{path}", timeout=10)
        if resp.status_code != 429:
            return {"limit": limit, "window_seconds": window_sec,
                    "rate": f"{limit} requests per {window_sec}s"}

    return {"limit": limit, "window": ">1 hour"}
Enter fullscreen mode Exit fullscreen mode

Running the Analysis

def analyze_api(base_url: str, endpoints: list[str], auth: dict = None):
    prober = RateLimitProber(base_url, auth)

    print(f"Rate Limit Analysis: {base_url}")

    for endpoint in endpoints:
        print(f"\nProbing {endpoint}...")
        result = prober.probe_endpoint(endpoint)

        limit = result.first_rejection_at or "None detected"
        print(f"  Requests before rejection: {limit}")
        print(f"  Rejection code: {result.rejection_code}")
        print(f"  Rate headers: {json.dumps(result.headers_found, indent=2)}")
        print(f"  Effective rate: {result.effective_rate:.1f} req/s")

        time.sleep(5)

analyze_api(
    "https://api.example.com",
    ["/v1/users", "/v1/search", "/v1/data"],
    auth={"Authorization": "Bearer YOUR_TOKEN"}
)
Enter fullscreen mode Exit fullscreen mode

Proxy Rotation for Scale

When probing APIs at scale, you need to test rate limits per-IP vs per-account. ScraperAPI lets you rotate IPs automatically to test whether limits are IP-based or token-based.

For residential IP testing, ThorData provides geo-distributed proxies. Monitor your probing runs with ScrapeOps dashboards.

Ethical Considerations

  • Only probe APIs you have legitimate access to
  • Use reasonable delays between requests
  • The goal is discovery, not denial of service
  • Document findings and share responsibly
  • Respect robots.txt and ToS

Rate limit discovery is a legitimate engineering practice that helps you build more resilient integrations. The best API consumers are the ones who understand and respect the limits.

Top comments (0)