Rate Limiting Strategies for Public APIs: Token Bucket, Sliding Window, and Redis
Without rate limiting, one bad actor can take down your entire API.
Here are the algorithms and implementations that hold up in production.
Why Rate Limiting
- Prevent abuse and DDoS
- Protect downstream services (databases, third-party APIs)
- Enable fair usage across customers
- Cost control (LLM tokens, SMS credits)
Algorithm 1: Fixed Window
// Simplest — allows burst at window boundary
async function fixedWindowLimit(
identifier: string,
limit: number,
windowSeconds: number
): Promise<{ allowed: boolean; remaining: number; reset: number }> {
const windowKey = Math.floor(Date.now() / (windowSeconds * 1000))
const key = `ratelimit:fixed:${identifier}:${windowKey}`
const count = await redis.incr(key)
if (count === 1) await redis.expire(key, windowSeconds)
const reset = (windowKey + 1) * windowSeconds * 1000
return {
allowed: count <= limit,
remaining: Math.max(0, limit - count),
reset,
}
}
Algorithm 2: Sliding Window (Recommended)
// More accurate — smooths burst at window boundary
async function slidingWindowLimit(
identifier: string,
limit: number,
windowMs: number
): Promise<{ allowed: boolean; remaining: number }> {
const now = Date.now()
const windowStart = now - windowMs
const key = `ratelimit:sliding:${identifier}`
// Atomic pipeline
const results = await redis
.multi()
.zremrangebyscore(key, 0, windowStart) // remove old entries
.zadd(key, now, `${now}-${Math.random()}`) // add current request
.zcard(key) // count in window
.expire(key, Math.ceil(windowMs / 1000))
.exec()
const count = results?.[2]?.[1] as number
return {
allowed: count <= limit,
remaining: Math.max(0, limit - count),
}
}
Algorithm 3: Token Bucket
// Best for bursty traffic — allows burst up to bucket size
async function tokenBucketLimit(
identifier: string,
bucketSize: number,
refillRate: number, // tokens per second
tokensRequired = 1
): Promise<{ allowed: boolean; tokens: number }> {
const key = `ratelimit:bucket:${identifier}`
const now = Date.now() / 1000
const data = await redis.get(key)
let tokens = bucketSize
let lastRefill = now
if (data) {
const parsed = JSON.parse(data)
const elapsed = now - parsed.lastRefill
tokens = Math.min(bucketSize, parsed.tokens + elapsed * refillRate)
lastRefill = now
}
if (tokens < tokensRequired) {
return { allowed: false, tokens: Math.floor(tokens) }
}
tokens -= tokensRequired
await redis.setex(key, 3600, JSON.stringify({ tokens, lastRefill }))
return { allowed: true, tokens: Math.floor(tokens) }
}
Middleware for Next.js
// middleware.ts
export async function middleware(request: NextRequest) {
const identifier = request.headers.get('x-api-key') ??
request.ip ??
'anonymous'
const { allowed, remaining } = await slidingWindowLimit(
`api:${identifier}`,
100, // 100 requests
60_000 // per minute
)
const response = NextResponse.next()
response.headers.set('X-RateLimit-Remaining', String(remaining))
if (!allowed) {
return NextResponse.json(
{ error: 'Rate limit exceeded', retryAfter: 60 },
{ status: 429, headers: { 'Retry-After': '60' } }
)
}
return response
}
Tiered Limits by Plan
const PLAN_LIMITS = {
free: { requests: 100, window: 60_000 },
pro: { requests: 1000, window: 60_000 },
enterprise: { requests: 10000, window: 60_000 },
}
async function planAwareLimiter(apiKey: string) {
const user = await getUserByApiKey(apiKey)
const limits = PLAN_LIMITS[user.plan]
return slidingWindowLimit(
`api:${user.id}`,
limits.requests,
limits.window
)
}
Using Upstash Ratelimit (Managed)
npm install @upstash/ratelimit @upstash/redis
import { Ratelimit } from '@upstash/ratelimit'
import { Redis } from '@upstash/redis'
const ratelimit = new Ratelimit({
redis: Redis.fromEnv(),
limiter: Ratelimit.slidingWindow(100, '1 m'),
analytics: true,
})
const { success, remaining } = await ratelimit.limit(identifier)
The MCP Security Scanner checks MCP servers for missing rate limiting — one of the most common vulnerabilities in production MCP deployments. $29 one-time.
Top comments (0)