Design Content Moderation Pipeline with Claude Code: AI Review + Human Queue

#claudecode #security #ai #typescript

Introduction

Automatic review of user-generated content — detect harmful content with AI, send suspicious posts to human review queue. Let Claude Code design a staged moderation pipeline.

CLAUDE.md Rules

## Content Moderation Rules
- Two-stage: pattern matching + AI classification
- Three decisions: safe / suspicious / blocked
- Images: AWS Rekognition for nudity/violence
- Human review queue: 24h SLA for suspicious
- False positive target: < 0.1%

Generated Implementation

// src/moderation/textModerator.ts

// Two-stage: fast patterns then AI
export async function moderateText(content: string): Promise<ModerationResult> {
  const patternResult = checkPatterns(content);
  if (patternResult.decision === 'blocked') return patternResult;

  return classifyWithAI(content);
}

// Claude Haiku for fast, cheap classification
async function classifyWithAI(content: string): Promise<ModerationResult> {
  const response = await anthropic.messages.create({
    model: 'claude-haiku-4-5-20251001',
    max_tokens: 200,
    messages: [{
      role: 'user',
      content: `Classify this content. JSON only:\n\n${content.slice(0, 500)}\n\n{"decision": "safe"|"suspicious"|"blocked", "categories": [...], "confidence": 0.0-1.0}`,
    }],
  });
  return JSON.parse(response.content[0].text);
}

// src/moderation/imageModerator.ts
import { RekognitionClient, DetectModerationLabelsCommand } from '@aws-sdk/client-rekognition';

const BLOCK_CATEGORIES = ['Explicit Nudity', 'Violence', 'Hate Symbols'];

export async function moderateImage(imageKey: string): Promise<ModerationResult> {
  const result = await rekognition.send(new DetectModerationLabelsCommand({
    Image: { S3Object: { Bucket: process.env.S3_BUCKET!, Name: imageKey } },
    MinConfidence: 70,
  }));

  const blockLabels = result.ModerationLabels?.filter(l =>
    BLOCK_CATEGORIES.some(cat => l.ParentName === cat)
  ) ?? [];

  if (blockLabels.length > 0) {
    return { decision: 'blocked', categories: blockLabels.map(l => l.Name ?? ''), confidence: 0.9 };
  }

  return { decision: 'safe', categories: [], confidence: 0.95 };
}

// Pipeline integration
switch (finalDecision.decision) {
  case 'blocked':
    await prisma.content.update({ where: { id: contentId }, data: { status: 'hidden' } });
    break;
  case 'suspicious':
    await prisma.reviewQueue.create({
      data: { contentId, priority: confidence > 0.8 ? 'high' : 'normal',
              dueAt: new Date(Date.now() + 24 * 60 * 60 * 1000) },
    });
    break;
  case 'safe':
    await prisma.content.update({ where: { id: contentId }, data: { status: 'published' } });
    break;
}