Content marketing needs a constant stream of fresh articles, but writing quality content is time-consuming. I built an automated article factory that uses AI models to generate, validate, and publish blog articles for DailyWatch. Here's the architecture.
System Overview
[Topic Queue] -> [AI Generator] -> [Validator] -> [Import Queue] -> [Publisher] -> [Live Blog]
The pipeline runs as a series of scripts, each handling one stage.
Stage 1: Topic Queue
Topics are defined in a configuration file with metadata:
{
"topics": [
{
"slug": "cryptocurrency",
"keywords": ["bitcoin", "ethereum", "crypto trading", "blockchain"],
"tone": "informative",
"word_count": [800, 1200],
"max_articles": 10
},
{
"slug": "web-development",
"keywords": ["PHP", "JavaScript", "web performance", "SEO"],
"tone": "technical",
"word_count": [600, 1000],
"max_articles": 5
}
]
}
Stage 2: AI Generation
The generator script sends prompts to multiple AI models and collects responses:
import json
import time
from pathlib import Path
def generate_article(topic: dict, model: str) -> dict:
"""Generate a single article using the specified AI model."""
prompt = build_prompt(topic)
# Call AI API (Gemini, Claude, GPT, etc.)
response = call_ai_api(model, prompt)
article = {
"title": response["title"],
"slug": slugify(response["title"]),
"body": response["body"],
"meta_description": response["meta_description"],
"tags": response["tags"],
"topic": topic["slug"],
"model": model,
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
"word_count": len(response["body"].split()),
"status": "pending_validation"
}
return article
def build_prompt(topic: dict) -> str:
return f"""Write a blog article about {topic['slug']}.
Requirements:
- Title: engaging, SEO-friendly
- Word count: {topic['word_count'][0]}-{topic['word_count'][1]} words
- Tone: {topic['tone']}
- Include keywords naturally: {', '.join(topic['keywords'])}
- Structure: introduction, 3-5 sections with headers, conclusion
- Include practical examples or data points
- Do NOT include any false claims or made-up statistics
Respond in JSON format with keys: title, body, meta_description, tags"""
Stage 3: Validation
Every generated article goes through validation before it can be imported:
class ArticleValidator {
private array $errors = [];
public function validate(array $article): bool {
$this->errors = [];
$this->checkRequiredFields($article);
$this->checkWordCount($article);
$this->checkTitleLength($article);
$this->checkJsonStructure($article);
$this->checkForProhibitedContent($article);
return empty($this->errors);
}
private function checkRequiredFields(array $article): void {
$required = ['title', 'slug', 'body', 'meta_description', 'tags'];
foreach ($required as $field) {
if (empty($article[$field])) {
$this->errors[] = "Missing required field: {$field}";
}
}
}
private function checkWordCount(array $article): void {
$wordCount = str_word_count(strip_tags($article['body'] ?? ''));
if ($wordCount < 300) {
$this->errors[] = "Article too short: {$wordCount} words (min 300)";
}
if ($wordCount > 3000) {
$this->errors[] = "Article too long: {$wordCount} words (max 3000)";
}
}
private function checkTitleLength(array $article): void {
$titleLen = mb_strlen($article['title'] ?? '');
if ($titleLen < 10 || $titleLen > 120) {
$this->errors[] = "Title length invalid: {$titleLen} chars (10-120 allowed)";
}
}
private function checkJsonStructure(array $article): void {
if (!is_array($article['tags'] ?? null) || count($article['tags']) < 1) {
$this->errors[] = "Tags must be a non-empty array";
}
}
private function checkForProhibitedContent(array $article): void {
$body = strtolower($article['body'] ?? '');
$prohibited = ['as an ai', 'i cannot', 'language model', 'openai', 'chatgpt'];
foreach ($prohibited as $phrase) {
if (str_contains($body, $phrase)) {
$this->errors[] = "Prohibited AI disclosure phrase found: {$phrase}";
}
}
}
public function getErrors(): array {
return $this->errors;
}
}
Stage 4: Import and Publish
class ArticleImporter {
public function __construct(
private readonly PDO $db,
private readonly ArticleValidator $validator,
) {}
public function importFromQueue(string $queueDir): array {
$stats = ['imported' => 0, 'invalid' => 0, 'errors' => []];
$files = glob($queueDir . '/*.json');
foreach ($files as $file) {
$article = json_decode(file_get_contents($file), true);
if (!$article) {
$stats['errors'][] = basename($file) . ': Invalid JSON';
$this->markInvalid($file);
continue;
}
if (!$this->validator->validate($article)) {
$stats['errors'][] = basename($file) . ': ' . implode(', ', $this->validator->getErrors());
$this->markInvalid($file);
$stats['invalid']++;
continue;
}
$this->insertArticle($article);
$this->moveToImported($file);
$stats['imported']++;
}
return $stats;
}
private function insertArticle(array $article): void {
$this->db->prepare('
INSERT INTO blog_articles (title, slug, body, meta_description,
tags, topic, status, published_at)
VALUES (?, ?, ?, ?, ?, ?, "published", datetime("now"))
')->execute([
$article['title'],
$article['slug'],
$article['body'],
$article['meta_description'],
json_encode($article['tags']),
$article['topic'] ?? '',
]);
}
private function markInvalid(string $file): void {
rename($file, dirname($file) . '/imported/INVALID-' . basename($file));
}
private function moveToImported(string $file): void {
rename($file, dirname($file) . '/imported/' . basename($file));
}
}
Publication Tracking
To avoid publishing too many articles at once (which looks unnatural to search engines), we use a publication tracker:
class PublishTracker {
public function __construct(private readonly PDO $db) {
$this->db->exec('
CREATE TABLE IF NOT EXISTS publish_track (
date TEXT PRIMARY KEY,
count INTEGER DEFAULT 0
)
');
}
public function canPublishToday(int $maxPerDay = 3): bool {
$today = date('Y-m-d');
$stmt = $this->db->prepare('SELECT count FROM publish_track WHERE date = ?');
$stmt->execute([$today]);
$count = $stmt->fetchColumn() ?: 0;
return $count < $maxPerDay;
}
public function recordPublish(): void {
$today = date('Y-m-d');
$this->db->prepare('
INSERT INTO publish_track (date, count) VALUES (?, 1)
ON CONFLICT(date) DO UPDATE SET count = count + 1
')->execute([$today]);
}
}
Results
The article factory at dailywatch.video has generated and published over 100 articles across multiple topics. Each article is unique, validated for quality, and published on a natural schedule.
The key insight: AI-generated content needs a robust validation pipeline. Without validation, you get hallucinations, formatting issues, and AI disclosure phrases that undermine credibility. The factory approach treats content generation as a pipeline with quality gates, not a one-shot generation.
Top comments (0)