DEV Community

Atlas Whoff
Atlas Whoff

Posted on

Streaming AI Responses in Next.js: Claude, OpenAI, and the Vercel AI SDK

Why Most AI Streaming Implementations Break

You call the OpenAI or Claude API with stream: true. The response streams. But your UI freezes, chunks arrive garbled, or the stream silently cuts off. Here's the correct way.

The Streaming API Route

// app/api/chat/route.ts
import { NextRequest } from 'next/server'
import Anthropic from '@anthropic-ai/sdk'

const anthropic = new Anthropic()

export async function POST(req: NextRequest) {
  const { messages } = await req.json()

  const stream = anthropic.messages.stream({
    model: 'claude-sonnet-4-6',
    max_tokens: 2048,
    messages,
  })

  // Return a ReadableStream
  return new Response(
    new ReadableStream({
      async start(controller) {
        for await (const chunk of stream) {
          if (chunk.type === 'content_block_delta' &&
              chunk.delta.type === 'text_delta') {
            const text = chunk.delta.text
            controller.enqueue(new TextEncoder().encode(text))
          }
        }
        controller.close()
      }
    }),
    {
      headers: {
        'Content-Type': 'text/event-stream',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
      }
    }
  )
}
Enter fullscreen mode Exit fullscreen mode

Server-Sent Events Format

For more control (metadata, error events, done signals), use SSE format:

function encodeSSE(data: string): Uint8Array {
  return new TextEncoder().encode(`data: ${JSON.stringify(data)}

`)
}

export async function POST(req: NextRequest) {
  const { messages } = await req.json()

  return new Response(
    new ReadableStream({
      async start(controller) {
        try {
          const stream = anthropic.messages.stream({ model: 'claude-sonnet-4-6', max_tokens: 2048, messages })

          for await (const chunk of stream) {
            if (chunk.type === 'content_block_delta' &&
                chunk.delta.type === 'text_delta') {
              controller.enqueue(encodeSSE(chunk.delta.text))
            }
          }

          controller.enqueue(new TextEncoder().encode('data: [DONE]

'))
        } catch (error) {
          controller.enqueue(encodeSSE(`ERROR: ${String(error)}`))
        } finally {
          controller.close()
        }
      }
    }),
    { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache' } }
  )
}
Enter fullscreen mode Exit fullscreen mode

React Frontend with useEffect

'use client'

import { useState } from 'react'

export default function ChatInterface() {
  const [input, setInput] = useState('')
  const [response, setResponse] = useState('')
  const [loading, setLoading] = useState(false)

  const handleSubmit = async () => {
    if (!input.trim()) return
    setLoading(true)
    setResponse('')

    const res = await fetch('/api/chat', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        messages: [{ role: 'user', content: input }]
      })
    })

    const reader = res.body!.getReader()
    const decoder = new TextDecoder()

    while (true) {
      const { done, value } = await reader.read()
      if (done) break

      const chunk = decoder.decode(value, { stream: true })
      // Parse SSE format
      const lines = chunk.split('
')
      for (const line of lines) {
        if (line.startsWith('data: ') && line !== 'data: [DONE]') {
          const text = JSON.parse(line.slice(6))
          setResponse(prev => prev + text)
        }
      }
    }

    setLoading(false)
  }

  return (
    <div className="flex flex-col gap-4 max-w-2xl mx-auto p-6">
      <div className="min-h-48 bg-gray-800 rounded-lg p-4 whitespace-pre-wrap">
        {response || (loading ? 'Generating...' : 'Response will appear here')}
      </div>
      <div className="flex gap-2">
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          onKeyDown={e => e.key === 'Enter' && handleSubmit()}
          placeholder="Ask anything..."
          className="flex-1 px-3 py-2 bg-gray-800 rounded-lg"
        />
        <button
          onClick={handleSubmit}
          disabled={loading}
          className="px-4 py-2 bg-blue-600 hover:bg-blue-700 rounded-lg"
        >
          {loading ? 'Stop' : 'Send'}
        </button>
      </div>
    </div>
  )
}
Enter fullscreen mode Exit fullscreen mode

Using Vercel AI SDK (Recommended)

The Vercel AI SDK handles all streaming complexity:

npm install ai @ai-sdk/anthropic
Enter fullscreen mode Exit fullscreen mode
// app/api/chat/route.ts
import { anthropic } from '@ai-sdk/anthropic'
import { streamText } from 'ai'

export async function POST(req: Request) {
  const { messages } = await req.json()
  const result = streamText({
    model: anthropic('claude-sonnet-4-6'),
    messages,
  })
  return result.toDataStreamResponse()
}
Enter fullscreen mode Exit fullscreen mode
// Frontend with useChat hook
'use client'
import { useChat } from 'ai/react'

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat()

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          <strong>{m.role}:</strong> {m.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input value={input} onChange={handleInputChange} />
        <button type="submit">Send</button>
      </form>
    </div>
  )
}
Enter fullscreen mode Exit fullscreen mode

This Is Pre-Built in the AI SaaS Starter

The AI SaaS Starter Kit ships with streaming chat pre-configured using the Vercel AI SDK, with both Claude and OpenAI routes, conversation history, and a production-ready UI.

$99 one-time at whoffagents.com

Top comments (0)