DEV Community

Atlas Whoff
Atlas Whoff

Posted on

Claude API Streaming: Real-Time AI Responses in Next.js with the Vercel AI SDK

Claude API Streaming: Real-Time AI Responses in Next.js with the Vercel AI SDK

Waiting 10 seconds for an AI response kills UX. Streaming shows output as it generates.
Here's how to implement it correctly with Claude in Next.js.

Why Streaming Matters

Without streaming: user waits 8-15 seconds, sees nothing, then gets full response.
With streaming: first token appears in ~500ms, text flows in real-time.

Perceived speed is dramatically better even though total time is the same.

Option 1: Vercel AI SDK (Recommended)

npm install ai @anthropic-ai/sdk
Enter fullscreen mode Exit fullscreen mode
// app/api/chat/route.ts
import { anthropic } from '@ai-sdk/anthropic'
import { streamText } from 'ai'

export const maxDuration = 30

export async function POST(request: Request) {
  const { messages } = await request.json()

  const result = await streamText({
    model: anthropic('claude-sonnet-4-6'),
    messages,
    system: 'You are a helpful assistant for developers.',
  })

  return result.toDataStreamResponse()
}
Enter fullscreen mode Exit fullscreen mode
// components/Chat.tsx
'use client'

import { useChat } from 'ai/react'

export function Chat() {
  const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat({
    api: '/api/chat',
  })

  return (
    <div>
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className={m.role === 'user' ? 'text-right' : 'text-left'}>
            <span className="inline-block p-3 rounded-lg bg-gray-100">
              {m.content}
            </span>
          </div>
        ))}
        {isLoading && <div className="animate-pulse">Thinking...</div>}
      </div>

      <form onSubmit={handleSubmit} className="mt-4 flex gap-2">
        <input
          value={input}
          onChange={handleInputChange}
          placeholder="Ask anything..."
          className="flex-1 border rounded px-3 py-2"
        />
        <button type="submit" disabled={isLoading}>Send</button>
      </form>
    </div>
  )
}
Enter fullscreen mode Exit fullscreen mode

Option 2: Direct Anthropic SDK

// app/api/generate/route.ts
import Anthropic from '@anthropic-ai/sdk'

const client = new Anthropic()

export async function POST(request: Request) {
  const { prompt } = await request.json()

  const stream = await client.messages.stream({
    model: 'claude-sonnet-4-6',
    max_tokens: 1024,
    messages: [{ role: 'user', content: prompt }],
  })

  const readable = new ReadableStream({
    async start(controller) {
      const encoder = new TextEncoder()
      for await (const chunk of stream) {
        if (chunk.type === 'content_block_delta' && chunk.delta.type === 'text_delta') {
          controller.enqueue(encoder.encode(chunk.delta.text))
        }
      }
      controller.close()
    },
  })

  return new Response(readable, {
    headers: { 'Content-Type': 'text/plain; charset=utf-8' },
  })
}
Enter fullscreen mode Exit fullscreen mode
// Client-side consumption
'use client'

async function streamGenerate(prompt: string) {
  const response = await fetch('/api/generate', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ prompt }),
  })

  const reader = response.body!.getReader()
  const decoder = new TextDecoder()

  while (true) {
    const { done, value } = await reader.read()
    if (done) break
    setOutput(prev => prev + decoder.decode(value))
  }
}
Enter fullscreen mode Exit fullscreen mode

Tool Use with Streaming

import { streamText, tool } from 'ai'
import { z } from 'zod'

const result = await streamText({
  model: anthropic('claude-sonnet-4-6'),
  messages,
  tools: {
    getWeather: tool({
      description: 'Get current weather for a city',
      parameters: z.object({ city: z.string() }),
      execute: async ({ city }) => {
        return { temperature: 72, condition: 'Sunny', city }
      },
    }),
  },
  maxSteps: 3,  // allow tool → response cycles
})
Enter fullscreen mode Exit fullscreen mode

Cost Management

const result = await streamText({
  model: anthropic('claude-haiku-4-5-20251001'),  // fast + cheap for simple tasks
  maxTokens: 500,  // cap output tokens
  messages: trimmedMessages,  // trim old messages to save input tokens
})
Enter fullscreen mode Exit fullscreen mode

Claude Haiku is ~20x cheaper than Sonnet and fast enough for most streaming use cases.


The AI SaaS Starter Kit ships with the Vercel AI SDK configured for Claude streaming, including chat UI, tool use scaffolding, and cost controls. $99 one-time.

Top comments (0)