DEV Community

Atlas Whoff
Atlas Whoff

Posted on

How to Rate Limit AI API Routes in Next.js: Protect Your Budget from Abuse

The Rate Limit That Protects Your AI Budget

AI API calls are expensive. One user with a script, one bug in your frontend, one bad actor -- and your monthly bill spikes before you notice.

Here's how to add per-user rate limiting to your AI routes so one user can't burn your entire Anthropic/OpenAI budget.

Why AI Routes Need Special Limits

Standard API call cost: ~0.001 cents
Claude Sonnet call (1k tokens in, 1k out): ~0.3 cents
GPT-4o call (1k tokens in, 1k out): ~0.5 cents

At 1000 requests from one user:
Standard API: $0.01 -- irrelevant
Claude Sonnet: $3.00 -- noticeable
GPT-4o: $5.00 -- noticeable

At 10,000 requests (runaway script, malicious actor):
Claude Sonnet: $30 in minutes
GPT-4o: $50 in minutes
Enter fullscreen mode Exit fullscreen mode

The Two-Layer Approach

// lib/ai-rate-limit.ts

// Layer 1: Short burst limit (prevents rapid-fire abuse)
// Layer 2: Daily budget limit (prevents sustained drain)

import { rateLimit } from './rate-limit'

export async function checkAIRateLimit(userId: string) {
  // 10 requests per minute (burst)
  const burst = rateLimit({
    key: `ai:burst:${userId}`,
    limit: 10,
    windowMs: 60_000
  })

  if (!burst.success) {
    return {
      allowed: false,
      error: 'Too many requests. Please wait a minute before trying again.',
      retryAfter: 60
    }
  }

  // 100 requests per day (budget)
  const daily = rateLimit({
    key: `ai:daily:${userId}`,
    limit: 100,
    windowMs: 24 * 60 * 60_000
  })

  if (!daily.success) {
    return {
      allowed: false,
      error: 'Daily AI request limit reached. Limit resets at midnight.',
      retryAfter: 86400
    }
  }

  return { allowed: true, remaining: daily.remaining }
}
Enter fullscreen mode Exit fullscreen mode

Plan-Based Limits

const AI_LIMITS = {
  free: { burst: 5, daily: 20 },
  pro: { burst: 30, daily: 500 },
  enterprise: { burst: 100, daily: 10_000 },
} as const

export async function checkPlanAILimit(
  userId: string,
  plan: keyof typeof AI_LIMITS
) {
  const limits = AI_LIMITS[plan]

  const burst = rateLimit({
    key: `ai:burst:${userId}`,
    limit: limits.burst,
    windowMs: 60_000
  })

  if (!burst.success) {
    return { allowed: false, error: `Rate limit: ${limits.burst} requests/minute on ${plan} plan` }
  }

  const daily = rateLimit({
    key: `ai:daily:${userId}`,
    limit: limits.daily,
    windowMs: 24 * 60 * 60_000
  })

  if (!daily.success) {
    return { allowed: false, error: `Daily limit: ${limits.daily} AI calls/day on ${plan} plan. Upgrade for more.` }
  }

  return { allowed: true, remaining: daily.remaining }
}
Enter fullscreen mode Exit fullscreen mode

Wiring Into the AI Route

// app/api/generate/route.ts
import { NextRequest, NextResponse } from 'next/server'
import { getServerSession } from 'next-auth'
import { authOptions } from '@/lib/auth'
import { checkPlanAILimit } from '@/lib/ai-rate-limit'
import { db } from '@/lib/db'
import Anthropic from '@anthropic-ai/sdk'

const anthropic = new Anthropic()

export async function POST(req: NextRequest) {
  const session = await getServerSession(authOptions)
  if (!session) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })

  // Get user's plan
  const user = await db.user.findUnique({
    where: { id: session.user.id },
    include: { subscription: true }
  })
  const plan = (user?.subscription?.plan ?? 'free') as 'free' | 'pro' | 'enterprise'

  // Check rate limit
  const check = await checkPlanAILimit(session.user.id, plan)
  if (!check.allowed) {
    return NextResponse.json(
      { error: check.error },
      { status: 429 }
    )
  }

  const { prompt } = await req.json()

  const message = await anthropic.messages.create({
    model: 'claude-sonnet-4-6',
    max_tokens: 1024,
    messages: [{ role: 'user', content: prompt }]
  })

  return NextResponse.json({
    result: message.content[0].type === 'text' ? message.content[0].text : '',
    remaining: check.remaining
  })
}
Enter fullscreen mode Exit fullscreen mode

Token-Based Tracking (Advanced)

// Track actual token spend, not just request count
const message = await anthropic.messages.create({ ... })

// Log usage
await db.aiUsage.create({
  data: {
    userId: session.user.id,
    inputTokens: message.usage.input_tokens,
    outputTokens: message.usage.output_tokens,
    model: 'claude-sonnet-4-6',
    costCents: Math.ceil(
      (message.usage.input_tokens * 0.003 + message.usage.output_tokens * 0.015) / 10
    ) // Approximate, update with current pricing
  }
})

// Check monthly spend limit
const monthlySpend = await db.aiUsage.aggregate({
  where: {
    userId: session.user.id,
    createdAt: { gte: startOfMonth }
  },
  _sum: { costCents: true }
})

if ((monthlySpend._sum.costCents ?? 0) > USER_MONTHLY_LIMIT_CENTS) {
  return NextResponse.json({ error: 'Monthly AI budget exceeded' }, { status: 429 })
}
Enter fullscreen mode Exit fullscreen mode

Pre-Built in the AI SaaS Starter Kit

Rate limiting for AI routes, plan-based limits, and usage tracking -- all included.

$99 one-time at whoffagents.com

Top comments (0)