Activates when working with observability — structured logging, correlation IDs, distributed tracing (OpenTelemetry), metrics (RED/USE), dashboards, alerting rules, log levels, and observability stack configuration
All three should be correlated via a shared request/trace ID.
Always log in structured format (JSON in production). Unstructured text logs are impossible to query at scale.
import pino from "pino";
const logger = pino({
level: process.env.LOG_LEVEL || "info",
formatters: {
level: (label) => ({ level: label }),
},
timestamp: pino.stdTimeFunctions.isoTime,
redact: ["req.headers.authorization", "body.password", "body.ssn"],
});
// Create child loggers with bound context
const requestLogger = logger.child({
requestId: req.id,
userId: req.user?.id,
service: "order-service",
});
requestLogger.info({ orderId, itemCount: items.length }, "Order created");
// Output: {"level":"info","time":"2026-01-15T10:30:00.000Z","requestId":"abc123","userId":"u_456","service":"order-service","orderId":"ord_789","itemCount":3,"msg":"Order created"}
import structlog
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer(),
],
)
logger = structlog.get_logger()
# Bind context for the request lifecycle
structlog.contextvars.bind_contextvars(request_id=request_id, user_id=user_id)
logger.info("order_created", order_id=order_id, item_count=len(items))
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo,
}))
logger = logger.With(
slog.String("service", "order-service"),
slog.String("version", version),
)
logger.Info("order created",
slog.String("order_id", orderID),
slog.Int("item_count", len(items)),
slog.Duration("duration", elapsed),
)
| Level | Purpose | Example |
|---|---|---|
| FATAL | Application cannot continue; process will exit | Database connection pool exhausted, unrecoverable state |
| ERROR | Operation failed; requires attention | Payment processing failed, external API returned 5xx |
| WARN | Something unexpected but handled; may indicate a problem | Retry succeeded after failure, deprecated API called, cache miss fallback |
| INFO | Significant business events; normal operation milestones | Order placed, user signed up, deployment started |
| DEBUG | Detailed diagnostic information for troubleshooting | SQL query with params, HTTP request/response bodies, cache hit/miss |
| TRACE | Very fine-grained; rarely enabled in production | Function entry/exit, loop iterations |
Rules:
Assign a unique request ID at the edge (API gateway, load balancer) and propagate it through all services.
// Middleware — assign or forward correlation ID
app.use((req, res, next) => {
req.id = req.headers["x-request-id"] || crypto.randomUUID();
res.setHeader("x-request-id", req.id);
next();
});
// Pass to downstream services
const response = await fetch("https://inventory-service/check", {
headers: { "x-request-id": req.id },
});
import { NodeSDK } from "@opentelemetry/sdk-node";
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
const sdk = new NodeSDK({
serviceName: "order-service",
traceExporter: new OTLPTraceExporter({
url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT || "http://otel-collector:4318/v1/traces",
}),
instrumentations: [
getNodeAutoInstrumentations({
"@opentelemetry/instrumentation-fs": { enabled: false },
}),
],
});
sdk.start();
import { trace, SpanStatusCode } from "@opentelemetry/api";
const tracer = trace.getTracer("order-service");
async function processOrder(orderId: string) {
return tracer.startActiveSpan("processOrder", async (span) => {
try {
span.setAttribute("order.id", orderId);
await tracer.startActiveSpan("validateOrder", async (validationSpan) => {
await validateOrder(orderId);
validationSpan.end();
});
await tracer.startActiveSpan("chargePayment", async (paymentSpan) => {
paymentSpan.setAttribute("payment.method", "stripe");
await chargePayment(orderId);
paymentSpan.end();
});
span.setStatus({ code: SpanStatusCode.OK });
} catch (err) {
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
span.recordException(err);
throw err;
} finally {
span.end();
}
});
}
import { Registry, Counter, Histogram } from "prom-client";
const registry = new Registry();
const httpRequestsTotal = new Counter({
name: "http_requests_total",
help: "Total HTTP requests",
labelNames: ["method", "path", "status_code"],
registers: [registry],
});
const httpRequestDuration = new Histogram({
name: "http_request_duration_seconds",
help: "HTTP request latency in seconds",
labelNames: ["method", "path"],
buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5],
registers: [registry],
});
// Middleware
app.use((req, res, next) => {
const end = httpRequestDuration.startTimer({ method: req.method, path: req.route?.path || req.path });
res.on("finish", () => {
httpRequestsTotal.inc({ method: req.method, path: req.route?.path || req.path, status_code: res.statusCode });
end();
});
next();
});
// Expose metrics endpoint
app.get("/metrics", async (req, res) => {
res.set("Content-Type", registry.contentType);
res.send(await registry.metrics());
});
# prometheus/alerts.yml