Set up comprehensive observability for Groq integrations with metrics, traces, and alerts. Use when implementing monitoring for Groq operations, setting up dashboards, or configuring alerting for Groq integration health. Trigger with phrases like "groq monitoring", "groq metrics", "groq observability", "monitor groq", "groq alerts", "groq tracing".
Monitor Groq LPU inference API for latency, token throughput, and cost. Groq's defining characteristic is extreme speed -- responses arrive in 50-200ms for small completions, with token generation rates of 500-800 tokens/second.
import Groq from 'groq-sdk';
async function trackedCompletion(groq: Groq, model: string, messages: any[]) {
const start = performance.now();
const res = await groq.chat.completions.create({ model, messages });
const duration = performance.now() - start;
const tps = (res.usage?.completion_tokens || 0) / (duration / 1000); # 1000: 1 second in ms
emitHistogram('groq_latency_ms', duration, { model });
emitGauge('groq_tokens_per_second', tps, { model });
emitCounter('groq_tokens_total', res.usage?.total_tokens || 0, { model, direction: 'total' });
emitCounter('groq_tokens_total', res.usage?.prompt_tokens || 0, { model, direction: 'input' });
emitCounter('groq_tokens_total', res.usage?.completion_tokens || 0, { model, direction: 'output' });
// Groq pricing is very low -- track for volume visibility
const pricing: Record<string, number> = { 'llama-3.3-70b-versatile': 0.59, 'llama-3.1-8b-instant': 0.05, 'mixtral-8x7b-32768': 0.24 }; # 32768 = configured value
const costPer1M = pricing[model] || 0.10;
emitCounter('groq_cost_usd', (res.usage?.total_tokens || 0) / 1e6 * costPer1M, { model });
return res;
}
// Groq returns rate limit info in response headers
function parseRateLimitHeaders(headers: Headers) {
emitGauge('groq_rate_limit_remaining_requests', parseInt(headers.get('x-ratelimit-remaining-requests') || '0'));
emitGauge('groq_rate_limit_remaining_tokens', parseInt(headers.get('x-ratelimit-remaining-tokens') || '0'));
const resetMs = parseInt(headers.get('x-ratelimit-reset-requests') || '0');
emitGauge('groq_rate_limit_reset_ms', resetMs);
}