Claude API Streaming: Real-Time AI Responses in Next.js with the Vercel AI SDK
Waiting 10 seconds for an AI response kills UX. Streaming shows output as it generates.
Here's how to implement it correctly with Claude in Next.js.
Why Streaming Matters
Without streaming: user waits 8-15 seconds, sees nothing, then gets full response.
With streaming: first token appears in ~500ms, text flows in real-time.
Perceived speed is dramatically better even though total time is the same.
Option 1: Vercel AI SDK (Recommended)
npm install ai @anthropic-ai/sdk
// app/api/chat/route.ts
import { anthropic } from '@ai-sdk/anthropic'
import { streamText } from 'ai'
export const maxDuration = 30
export async function POST(request: Request) {
const { messages } = await request.json()
const result = await streamText({
model: anthropic('claude-sonnet-4-6'),
messages,
system: 'You are a helpful assistant for developers.',
})
return result.toDataStreamResponse()
}
// components/Chat.tsx
'use client'
import { useChat } from 'ai/react'
export function Chat() {
const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat({
api: '/api/chat',
})
return (
<div>
<div className="space-y-4">
{messages.map(m => (
<div key={m.id} className={m.role === 'user' ? 'text-right' : 'text-left'}>
<span className="inline-block p-3 rounded-lg bg-gray-100">
{m.content}
</span>
</div>
))}
{isLoading && <div className="animate-pulse">Thinking...</div>}
</div>
<form onSubmit={handleSubmit} className="mt-4 flex gap-2">
<input
value={input}
onChange={handleInputChange}
placeholder="Ask anything..."
className="flex-1 border rounded px-3 py-2"
/>
<button type="submit" disabled={isLoading}>Send</button>
</form>
</div>
)
}
Option 2: Direct Anthropic SDK
// app/api/generate/route.ts
import Anthropic from '@anthropic-ai/sdk'
const client = new Anthropic()
export async function POST(request: Request) {
const { prompt } = await request.json()
const stream = await client.messages.stream({
model: 'claude-sonnet-4-6',
max_tokens: 1024,
messages: [{ role: 'user', content: prompt }],
})
const readable = new ReadableStream({
async start(controller) {
const encoder = new TextEncoder()
for await (const chunk of stream) {
if (chunk.type === 'content_block_delta' && chunk.delta.type === 'text_delta') {
controller.enqueue(encoder.encode(chunk.delta.text))
}
}
controller.close()
},
})
return new Response(readable, {
headers: { 'Content-Type': 'text/plain; charset=utf-8' },
})
}
// Client-side consumption
'use client'
async function streamGenerate(prompt: string) {
const response = await fetch('/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt }),
})
const reader = response.body!.getReader()
const decoder = new TextDecoder()
while (true) {
const { done, value } = await reader.read()
if (done) break
setOutput(prev => prev + decoder.decode(value))
}
}
Tool Use with Streaming
import { streamText, tool } from 'ai'
import { z } from 'zod'
const result = await streamText({
model: anthropic('claude-sonnet-4-6'),
messages,
tools: {
getWeather: tool({
description: 'Get current weather for a city',
parameters: z.object({ city: z.string() }),
execute: async ({ city }) => {
return { temperature: 72, condition: 'Sunny', city }
},
}),
},
maxSteps: 3, // allow tool → response cycles
})
Cost Management
const result = await streamText({
model: anthropic('claude-haiku-4-5-20251001'), // fast + cheap for simple tasks
maxTokens: 500, // cap output tokens
messages: trimmedMessages, // trim old messages to save input tokens
})
Claude Haiku is ~20x cheaper than Sonnet and fast enough for most streaming use cases.
The AI SaaS Starter Kit ships with the Vercel AI SDK configured for Claude streaming, including chat UI, tool use scaffolding, and cost controls. $99 one-time.
Top comments (0)