NGINX Has a Free API: Here's How to Use It for Load Balancer Automation

#nginx #devops #api #webdev

NGINX Plus has a commercial API, but open-source NGINX provides powerful automation through its configuration system and the stub_status module. Combined with tools like NGINX Unit, you get a true REST API for your web server.

Why Automate NGINX?

Dynamic upstream management without reloads
Monitor connections, requests, and response codes
Automate SSL certificate rotation
Build self-healing load balancers

Getting Started: Status Monitoring

Enable stub_status in nginx.conf:

server {
    listen 8080;
    location /nginx_status {
        stub_status;
        allow 127.0.0.1;
        deny all;
    }
}

curl -s http://localhost:8080/nginx_status
# Active connections: 42
# server accepts handled requests
# 7368 7368 10993
# Reading: 0 Writing: 1 Waiting: 41

Parse NGINX Metrics with Python

import requests
import re

def get_nginx_metrics(status_url='http://localhost:8080/nginx_status'):
    text = requests.get(status_url).text

    active = int(re.search(r'Active connections: (\d+)', text).group(1))
    accepts, handled, requests_total = map(int, re.search(r'\s+(\d+)\s+(\d+)\s+(\d+)', text).groups())
    reading = int(re.search(r'Reading: (\d+)', text).group(1))
    writing = int(re.search(r'Writing: (\d+)', text).group(1))
    waiting = int(re.search(r'Waiting: (\d+)', text).group(1))

    return {
        'active_connections': active,
        'total_accepts': accepts,
        'total_handled': handled,
        'total_requests': requests_total,
        'reading': reading,
        'writing': writing,
        'waiting': waiting,
        'dropped': accepts - handled
    }

metrics = get_nginx_metrics()
for key, value in metrics.items():
    print(f"{key:25s}: {value}")

NGINX Unit: True REST API

NGINX Unit is a modern app server with a full REST API:

# Deploy a Python app
curl -X PUT --data '{
  "listeners": {
    "*:8300": {
      "pass": "applications/my-python-app"
    }
  },
  "applications": {
    "my-python-app": {
      "type": "python 3",
      "path": "/www/my-app/",
      "module": "wsgi",
      "processes": 4
    }
  }
}' --unix-socket /var/run/control.unit.sock http://localhost/config/

# Get current config
curl --unix-socket /var/run/control.unit.sock http://localhost/config/ | jq .

# Scale up processes
curl -X PUT --data '8' --unix-socket /var/run/control.unit.sock http://localhost/config/applications/my-python-app/processes

Dynamic Upstream Management

import subprocess
import os

def update_upstreams(servers, config_path='/etc/nginx/conf.d/upstreams.conf'):
    upstream_block = "upstream backend {\n"
    for server in servers:
        weight = server.get('weight', 1)
        upstream_block += f"    server {server['host']}:{server['port']} weight={weight};\n"
    upstream_block += "}\n"

    with open(config_path, 'w') as f:
        f.write(upstream_block)

    result = subprocess.run(['nginx', '-t'], capture_output=True, text=True)
    if result.returncode == 0:
        subprocess.run(['nginx', '-s', 'reload'])
        print(f"Updated upstreams: {len(servers)} servers")
        return True
    else:
        print(f"Config test failed: {result.stderr}")
        return False

# Auto-scale based on health checks
def health_check_and_update(servers):
    healthy = []
    for server in servers:
        try:
            resp = requests.get(f"http://{server['host']}:{server['port']}/health", timeout=2)
            if resp.status_code == 200:
                healthy.append(server)
            else:
                print(f"Unhealthy: {server['host']}:{server['port']} (status {resp.status_code})")
        except requests.exceptions.RequestException:
            print(f"Down: {server['host']}:{server['port']}")

    if healthy:
        update_upstreams(healthy)
    else:
        print("CRITICAL: No healthy upstream servers!")

servers = [
    {'host': '10.0.1.1', 'port': 8080, 'weight': 3},
    {'host': '10.0.1.2', 'port': 8080, 'weight': 2},
    {'host': '10.0.1.3', 'port': 8080, 'weight': 1}
]
health_check_and_update(servers)

SSL Certificate Automation

import subprocess
from datetime import datetime
import ssl
import socket

def check_ssl_expiry(domain, port=443):
    context = ssl.create_default_context()
    with socket.create_connection((domain, port)) as sock:
        with context.wrap_socket(sock, server_hostname=domain) as ssock:
            cert = ssock.getpeercert()
            expiry = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z')
            days_left = (expiry - datetime.now()).days
            return days_left

def auto_renew_certs(domains, threshold_days=30):
    for domain in domains:
        days = check_ssl_expiry(domain)
        print(f"{domain}: {days} days until expiry")

        if days < threshold_days:
            print(f"  Renewing certificate for {domain}...")
            result = subprocess.run(
                ['certbot', 'renew', '--cert-name', domain, '--quiet'],
                capture_output=True, text=True
            )
            if result.returncode == 0:
                subprocess.run(['nginx', '-s', 'reload'])
                print(f"  Renewed and reloaded!")
            else:
                print(f"  Renewal failed: {result.stderr}")

auto_renew_certs(['example.com', 'api.example.com'])

Log Analysis

import re
from collections import Counter

def analyze_access_log(log_path='/var/log/nginx/access.log', lines=10000):
    pattern = r'(\S+) .+ \[(.+)\] "(\w+) (.+?) .+" (\d+) (\d+)'

    status_codes = Counter()
    paths = Counter()
    ips = Counter()

    with open(log_path) as f:
        for i, line in enumerate(f):
            if i >= lines:
                break
            match = re.match(pattern, line)
            if match:
                ip, _, method, path, status, size = match.groups()
                status_codes[status] += 1
                paths[path] += 1
                ips[ip] += 1

    print("Top Status Codes:")
    for code, count in status_codes.most_common(5):
        print(f"  {code}: {count}")

    print("\nTop Paths:")
    for path, count in paths.most_common(10):
        print(f"  {path}: {count}")

    print("\nTop IPs:")
    for ip, count in ips.most_common(10):
        print(f"  {ip}: {count}")

analyze_access_log()

Real-World Use Case

A SaaS company built an auto-healing load balancer with NGINX. When health checks detected a failing backend, Python scripts automatically removed it from the upstream pool and triggered a container restart. When the backend recovered, it was re-added. Downtime dropped from 5 minutes (manual intervention) to 15 seconds (automated).