Building APIs that can handle growth is one of the biggest challenges in software development. In this guide, we'll explore architectural patterns that help your APIs scale from hundreds to millions of requests.
Understanding API Scalability
Scalability isn't just about handling more traffic—it's about maintaining performance, reliability, and developer experience as your system grows.
Key Metrics to Track
- Requests per second (RPS): How many requests your API handles
- Response time (p50, p95, p99): How fast your API responds
- Error rate: Percentage of failed requests
- Availability: Uptime percentage (99.9% = 8.76 hours downtime/year)
Starting Simple: The Monolith
Don't start with microservices. A well-designed monolith can handle significant scale:
┌─────────────────────────────────────────┐
│ Load Balancer │
└─────────────────┬───────────────────────┘
│
┌─────────────┴─────────────┐
│ │
┌───▼───┐ ┌───▼───┐
│ App 1 │ │ App 2 │
└───┬───┘ └───┬───┘
│ │
└─────────────┬─────────────┘
│
┌───────▼───────┐
│ Database │
└───────────────┘
Monolith Best Practices
// Organize code by domain, not by type
// Good structure
src/
users/
controller.js
service.js
repository.js
routes.js
orders/
controller.js
service.js
repository.js
routes.js
// Avoid this
src/
controllers/
services/
repositories/
Adding Caching Layers
Caching is your first line of defense against high load:
const Redis = require('ioredis');
const redis = new Redis();
async function getCachedUser(userId) {
// Try cache first
const cached = await redis.get(`user:${userId}`);
if (cached) {
return JSON.parse(cached);
}
// Fetch from database
const user = await db.users.findById(userId);
// Cache for 5 minutes
await redis.setex(`user:${userId}`, 300, JSON.stringify(user));
return user;
}
Cache Invalidation Strategies
- Time-based (TTL): Simple but may serve stale data
- Write-through: Update cache on writes
- Event-driven: Invalidate on specific events
// Event-driven cache invalidation
eventBus.on('user.updated', async (userId) => {
await redis.del(`user:${userId}`);
await redis.del(`user:${userId}:profile`);
});
Database Scaling Strategies
Read Replicas
const { Pool } = require('pg');
const primaryPool = new Pool({
host: 'primary.db.example.com',
// ... config
});
const replicaPool = new Pool({
host: 'replica.db.example.com',
// ... config
});
// Route reads to replica, writes to primary
async function getUser(id) {
return replicaPool.query('SELECT * FROM users WHERE id = $1', [id]);
}
async function updateUser(id, data) {
return primaryPool.query('UPDATE users SET name = $1 WHERE id = $2', [data.name, id]);
}
Database Sharding
For massive scale, partition data across multiple databases:
function getShardKey(userId) {
// Consistent hashing
return userId % NUMBER_OF_SHARDS;
}
function getConnection(userId) {
const shardId = getShardKey(userId);
return connectionPools[shardId];
}
Moving to Microservices
When your monolith becomes too complex, consider breaking it apart:
┌─────────────────────────────────────────────────────┐
│ API Gateway │
└─────────────────────┬───────────────────────────────┘
│
┌─────────────────┼─────────────────┐
│ │ │
┌───▼───┐ ┌────▼────┐ ┌────▼────┐
│ Users │ │ Orders │ │ Payments │
│Service│ │ Service │ │ Service │
└───┬───┘ └────┬────┘ └────┬────┘
│ │ │
┌───▼───┐ ┌────▼────┐ ┌────▼────┐
│Users │ │ Orders │ │Payments │
│ DB │ │ DB │ │ DB │
└───────┘ └─────────┘ └─────────┘
API Gateway Pattern
// Simple API Gateway with Express
const express = require('express');
const httpProxy = require('http-proxy-middleware');
const app = express();
// Route to appropriate service
app.use('/api/users', httpProxy({
target: 'http://users-service:3001',
changeOrigin: true
}));
app.use('/api/orders', httpProxy({
target: 'http://orders-service:3002',
changeOrigin: true
}));
app.use('/api/payments', httpProxy({
target: 'http://payments-service:3003',
changeOrigin: true
}));
Service Communication
Synchronous (REST/gRPC)
// REST call between services
async function getUserOrders(userId) {
const user = await fetch(`http://users-service/users/${userId}`);
const orders = await fetch(`http://orders-service/orders?userId=${userId}`);
return {
user: await user.json(),
orders: await orders.json()
};
}
Asynchronous (Message Queues)
// Using RabbitMQ
const amqp = require('amqplib');
// Publisher (Orders Service)
async function publishOrderCreated(order) {
const channel = await connection.createChannel();
channel.publish(
'orders',
'order.created',
Buffer.from(JSON.stringify(order))
);
}
// Consumer (Notifications Service)
async function consumeOrders() {
const channel = await connection.createChannel();
channel.consume('notifications-queue', (msg) => {
const order = JSON.parse(msg.content.toString());
sendOrderConfirmationEmail(order);
channel.ack(msg);
});
}
Resilience Patterns
Circuit Breaker
const CircuitBreaker = require('opossum');
const options = {
timeout: 3000,
errorThresholdPercentage: 50,
resetTimeout: 30000
};
const breaker = new CircuitBreaker(callExternalService, options);
breaker.fallback(() => {
return { status: 'service unavailable', cached: getCachedData() };
});
breaker.on('open', () => console.log('Circuit opened'));
breaker.on('close', () => console.log('Circuit closed'));
async function getDataWithBreaker() {
return breaker.fire();
}
Retry with Exponential Backoff
async function retryWithBackoff(fn, maxRetries = 3) {
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await fn();
} catch (error) {
if (attempt === maxRetries - 1) throw error;
const delay = Math.min(1000 * Math.pow(2, attempt), 10000);
await new Promise(resolve => setTimeout(resolve, delay));
}
}
}
Monitoring and Observability
Essential Logging
const winston = require('winston');
const logger = winston.createLogger({
format: winston.format.json(),
transports: [
new winston.transports.Console()
]
});
// Structured logging
logger.info('API request processed', {
requestId: req.id,
method: req.method,
path: req.path,
duration: Date.now() - startTime,
statusCode: res.statusCode
});
Health Checks
app.get('/health', async (req, res) => {
const health = {
status: 'healthy',
timestamp: new Date().toISOString(),
checks: {
database: await checkDatabase(),
redis: await checkRedis(),
externalApi: await checkExternalApi()
}
};
const isHealthy = Object.values(health.checks).every(c => c.status === 'up');
res.status(isHealthy ? 200 : 503).json(health);
});
Conclusion
Scaling APIs is a journey, not a destination:
- Start simple with a well-structured monolith
- Add caching early to reduce database load
- Scale your database with replicas and eventually sharding
- Extract services only when complexity demands it
- Build resilience with circuit breakers and retries
- Monitor everything to catch issues early
Remember: premature optimization is the root of all evil. Scale when you need to, not before.
Originally published at APIVerve Blog
Top comments (0)