Why Custom Monitoring
Off-the-shelf monitoring tools are powerful but heavy. For a video platform like DailyWatch running on shared LiteSpeed hosting, we needed a lightweight Go binary that checks endpoint health, tracks uptime, and exposes Prometheus metrics -- without installing a full monitoring stack.
Health Check Configuration
Define what to monitor:
package monitor
import "time"
type CheckConfig struct {
Name string `json:"name"`
URL string `json:"url"`
Method string `json:"method"`
Interval time.Duration `json:"interval"`
Timeout time.Duration `json:"timeout"`
ExpectCode int `json:"expect_code"`
ExpectBody string `json:"expect_body"`
}
type CheckResult struct {
Name string `json:"name"`
URL string `json:"url"`
Status string `json:"status"` // "up" or "down"
StatusCode int `json:"status_code"`
Latency time.Duration `json:"latency"`
Error string `json:"error,omitempty"`
CheckedAt time.Time `json:"checked_at"`
}
var DefaultChecks = []CheckConfig{
{
Name: "homepage",
URL: "https://dailywatch.video",
Method: "GET",
Interval: 5 * time.Minute,
Timeout: 10 * time.Second,
ExpectCode: 200,
},
{
Name: "search_api",
URL: "https://dailywatch.video/search?q=test&format=json",
Method: "GET",
Interval: 10 * time.Minute,
Timeout: 15 * time.Second,
ExpectCode: 200,
ExpectBody: "results",
},
{
Name: "sitemap",
URL: "https://dailywatch.video/sitemap.xml",
Method: "GET",
Interval: 30 * time.Minute,
Timeout: 10 * time.Second,
ExpectCode: 200,
ExpectBody: "<urlset",
},
}
The Health Checker
Perform checks concurrently:
package monitor
import (
"fmt"
"io"
"net/http"
"strings"
"sync"
"time"
)
type HealthChecker struct {
client *http.Client
results map[string]*CheckResult
history map[string][]CheckResult
mu sync.RWMutex
}
func NewHealthChecker() *HealthChecker {
return &HealthChecker{
client: &http.Client{},
results: make(map[string]*CheckResult),
history: make(map[string][]CheckResult),
}
}
func (hc *HealthChecker) RunCheck(cfg CheckConfig) CheckResult {
hc.client.Timeout = cfg.Timeout
start := time.Now()
req, err := http.NewRequest(cfg.Method, cfg.URL, nil)
if err != nil {
return hc.recordResult(cfg.Name, cfg.URL, CheckResult{
Name: cfg.Name, URL: cfg.URL, Status: "down",
Error: fmt.Sprintf("request creation failed: %v", err),
CheckedAt: time.Now(),
})
}
req.Header.Set("User-Agent", "DailyWatch-Monitor/1.0")
resp, err := hc.client.Do(req)
latency := time.Since(start)
if err != nil {
return hc.recordResult(cfg.Name, cfg.URL, CheckResult{
Name: cfg.Name, URL: cfg.URL, Status: "down",
Latency: latency, Error: err.Error(), CheckedAt: time.Now(),
})
}
defer resp.Body.Close()
result := CheckResult{
Name: cfg.Name,
URL: cfg.URL,
StatusCode: resp.StatusCode,
Latency: latency,
CheckedAt: time.Now(),
}
// Check status code
if cfg.ExpectCode > 0 && resp.StatusCode != cfg.ExpectCode {
result.Status = "down"
result.Error = fmt.Sprintf("expected %d, got %d", cfg.ExpectCode, resp.StatusCode)
return hc.recordResult(cfg.Name, cfg.URL, result)
}
// Check body content
if cfg.ExpectBody != "" {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) // 1MB max
if !strings.Contains(string(body), cfg.ExpectBody) {
result.Status = "down"
result.Error = fmt.Sprintf("body missing expected string: %s", cfg.ExpectBody)
return hc.recordResult(cfg.Name, cfg.URL, result)
}
}
result.Status = "up"
return hc.recordResult(cfg.Name, cfg.URL, result)
}
func (hc *HealthChecker) recordResult(name, url string, result CheckResult) CheckResult {
hc.mu.Lock()
defer hc.mu.Unlock()
hc.results[name] = &result
hc.history[name] = append(hc.history[name], result)
// Keep last 100 results
if len(hc.history[name]) > 100 {
hc.history[name] = hc.history[name][len(hc.history[name])-100:]
}
return result
}
func (hc *HealthChecker) GetUptime(name string, window time.Duration) float64 {
hc.mu.RLock()
defer hc.mu.RUnlock()
cutoff := time.Now().Add(-window)
total, up := 0, 0
for _, r := range hc.history[name] {
if r.CheckedAt.After(cutoff) {
total++
if r.Status == "up" {
up++
}
}
}
if total == 0 {
return 0
}
return float64(up) / float64(total) * 100
}
Prometheus Metrics Endpoint
Expose metrics in Prometheus format for scraping:
package main
import (
"fmt"
"net/http"
"time"
"myapp/monitor"
)
func metricsHandler(checker *monitor.HealthChecker, checks []monitor.CheckConfig) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; version=0.0.4")
for _, cfg := range checks {
result := checker.RunCheck(cfg)
upVal := 0
if result.Status == "up" {
upVal = 1
}
uptime24h := checker.GetUptime(cfg.Name, 24*time.Hour)
fmt.Fprintf(w, "# HELP video_platform_up Whether the endpoint is up (1) or down (0)\n")
fmt.Fprintf(w, "video_platform_up{name=%q,url=%q} %d\n", cfg.Name, cfg.URL, upVal)
fmt.Fprintf(w, "video_platform_latency_ms{name=%q} %.1f\n", cfg.Name, float64(result.Latency.Milliseconds()))
fmt.Fprintf(w, "video_platform_uptime_24h{name=%q} %.2f\n", cfg.Name, uptime24h)
}
}
}
func main() {
checker := monitor.NewHealthChecker()
checks := monitor.DefaultChecks
// Run checks on schedule
go func() {
for {
for _, cfg := range checks {
result := checker.RunCheck(cfg)
if result.Status == "down" {
fmt.Printf("ALERT: %s is DOWN - %s\n", result.Name, result.Error)
}
}
time.Sleep(5 * time.Minute)
}
}()
http.HandleFunc("/metrics", metricsHandler(checker, checks))
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("OK"))
})
fmt.Println("Monitor running on :9090")
http.ListenAndServe(":9090", nil)
}
Scrape with Prometheus
Add to your prometheus.yml:
scrape_configs:
- job_name: 'dailywatch'
scrape_interval: 5m
static_configs:
- targets: ['localhost:9090']
This monitoring setup keeps us informed about the health of DailyWatch without any external dependencies. The single Go binary uses under 10MB of RAM and catches downtime within minutes. When an endpoint goes down, we see it in Grafana dashboards and can set up Alertmanager rules for Slack or email notifications.
This article is part of the Building DailyWatch series. Check out DailyWatch to see these techniques in action.
Top comments (0)