Set up monitoring and alerting - dashboards, alerts, runbooks, and incident response.
| Signal | What it measures | Example metric |
|---|---|---|
| Latency | Time to response | p50, p95, p99 |
| Traffic | System demand | requests/sec |
| Errors | Failure rate | error % |
| Saturation | Capacity headroom | CPU, memory |
import { Registry, Counter, Histogram, Gauge } from 'prom-client';
const registry = new Registry();
// Counter - tracks total count
const httpRequests = new Counter({
name: 'http_requests_total',
help: 'Total HTTP requests',
labelNames: ['method', 'path', 'status'],
registers: [registry],
});
// Histogram - tracks distributions
const httpDuration = new Histogram({
name: 'http_request_duration_seconds',
help: 'HTTP request duration',
labelNames: ['method', 'path'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
registers: [registry],
});
// Gauge - tracks current value
const activeConnections = new Gauge({
name: 'active_connections',
help: 'Number of active connections',
registers: [registry],
});
// Use in middleware
app.use((req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
httpRequests.inc({ method: req.method, path: req.route?.path, status: res.statusCode });
httpDuration.observe({ method: req.method, path: req.route?.path }, duration);
});
next();
});
# prometheus-rules.yml