Name: Cost Guardian
Author: dudedesi12

Buscar habilidades.../

Cost Guardian | Skills Pool

// lib/ai/model-router.ts
type TaskComplexity = "simple" | "medium" | "complex";

const MODEL_MAP: Record<TaskComplexity, string> = {
  simple: "gemini-2.0-flash-lite",   // Cheapest: classification, extraction
  medium: "gemini-2.0-flash",         // Mid-tier: summarization, Q&A
  complex: "gemini-2.5-pro",          // Most capable: analysis, reasoning
};

export function getModelForTask(task: string): { model: string; complexity: TaskComplexity } {
  const simplePatterns = ["classify", "extract", "format", "validate", "translate"];
  const complexPatterns = ["analyze", "recommend", "compare", "assess eligibility", "generate report"];

  const isSimple = simplePatterns.some((p) => task.toLowerCase().includes(p));
  const isComplex = complexPatterns.some((p) => task.toLowerCase().includes(p));

  const complexity: TaskComplexity = isComplex ? "complex" : isSimple ? "simple" : "medium";

  return { model: MODEL_MAP[complexity], complexity };
}

// lib/ai/gemini-client.ts
import { GoogleGenerativeAI } from "@google/generative-ai";
import { getModelForTask } from "./model-router";
import { trackTokenUsage } from "./usage-tracker";

const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);

export async function generateWithTracking(
  task: string,
  prompt: string,
  userId?: string
) {
  const { model: modelName, complexity } = getModelForTask(task);
  const model = genAI.getGenerativeModel({ model: modelName });

  const result = await model.generateContent(prompt);
  const response = result.response;

  // Track usage
  const usage = response.usageMetadata;
  if (usage) {
    await trackTokenUsage({
      userId,
      model: modelName,
      complexity,
      promptTokens: usage.promptTokenCount ?? 0,
      completionTokens: usage.candidatesTokenCount ?? 0,
      totalTokens: usage.totalTokenCount ?? 0,
      feature: task,
    });
  }

  return response.text();
}

-- supabase/migrations/xxx_token_usage.sql
CREATE TABLE token_usage (
  id uuid DEFAULT gen_random_uuid() PRIMARY KEY,
  user_id uuid REFERENCES auth.users(id) ON DELETE SET NULL,
  model text NOT NULL,
  complexity text NOT NULL,
  prompt_tokens integer NOT NULL DEFAULT 0,
  completion_tokens integer NOT NULL DEFAULT 0,
  total_tokens integer NOT NULL DEFAULT 0,
  estimated_cost_usd numeric(10, 6) NOT NULL DEFAULT 0,
  feature text,
  created_at timestamptz DEFAULT now()
);

CREATE INDEX idx_token_usage_user ON token_usage(user_id, created_at DESC);
CREATE INDEX idx_token_usage_date ON token_usage(created_at DESC);

// lib/ai/usage-tracker.ts
import { createClient } from "@/lib/supabase/server";

const COST_PER_1M_TOKENS: Record<string, { input: number; output: number }> = {
  "gemini-2.0-flash-lite": { input: 0.075, output: 0.30 },
  "gemini-2.0-flash": { input: 0.10, output: 0.40 },
  "gemini-2.5-pro": { input: 1.25, output: 10.00 },
};

interface UsageRecord {
  userId?: string;
  model: string;
  complexity: string;
  promptTokens: number;
  completionTokens: number;
  totalTokens: number;
  feature: string;
}

export async function trackTokenUsage(record: UsageRecord) {
  const costs = COST_PER_1M_TOKENS[record.model] ?? { input: 0, output: 0 };
  const estimatedCost =
    (record.promptTokens / 1_000_000) * costs.input +
    (record.completionTokens / 1_000_000) * costs.output;

  const supabase = await createClient();
  await supabase.from("token_usage").insert({
    user_id: record.userId,
    model: record.model,
    complexity: record.complexity,
    prompt_tokens: record.promptTokens,
    completion_tokens: record.completionTokens,
    total_tokens: record.totalTokens,
    estimated_cost_usd: estimatedCost,
    feature: record.feature,
  });
}

// BAD: Fetching unnecessary data
const { data } = await supabase.from("profiles").select("*"); // All columns, all rows

// GOOD: Selective and paginated
const { data } = await supabase
  .from("profiles")
  .select("id, full_name, email")  // Only needed columns
  .range(0, 19)                    // Paginated
  .order("created_at", { ascending: false });

-- supabase/migrations/xxx_budget_config.sql
CREATE TABLE budget_config (
  id uuid DEFAULT gen_random_uuid() PRIMARY KEY,
  service text NOT NULL UNIQUE,
  daily_limit_usd numeric(10, 2),
  monthly_limit_usd numeric(10, 2),
  alert_threshold_pct integer DEFAULT 80,
  hard_limit boolean DEFAULT false,
  updated_at timestamptz DEFAULT now()
);

INSERT INTO budget_config (service, daily_limit_usd, monthly_limit_usd, alert_threshold_pct, hard_limit)
VALUES
  ('gemini', 5.00, 100.00, 80, true),
  ('supabase', null, 25.00, 90, false),
  ('vercel', null, 20.00, 90, false);

// lib/costs/budget-check.ts
import { createClient } from "@/lib/supabase/server";

export async function checkBudget(service: string): Promise<{
  allowed: boolean;
  usage: number;
  limit: number;
  percentUsed: number;
}> {
  const supabase = await createClient();

  const { data: config } = await supabase
    .from("budget_config")
    .select("*")
    .eq("service", service)
    .single();

  if (!config?.daily_limit_usd) return { allowed: true, usage: 0, limit: 0, percentUsed: 0 };

  const today = new Date().toISOString().split("T")[0];
  const { data: usage } = await supabase
    .from("token_usage")
    .select("estimated_cost_usd")
    .gte("created_at", `${today}T00:00:00Z`)
    .lte("created_at", `${today}T23:59:59Z`);

  const totalUsage = (usage ?? []).reduce((sum, r) => sum + Number(r.estimated_cost_usd), 0);
  const percentUsed = Math.round((totalUsage / Number(config.daily_limit_usd)) * 100);

  return {
    allowed: !config.hard_limit || totalUsage < Number(config.daily_limit_usd),
    usage: totalUsage,
    limit: Number(config.daily_limit_usd),
    percentUsed,
  };
}

// Don't call Gemini twice for the same input
import { Redis } from "@upstash/redis";
const redis = new Redis({ url: process.env.UPSTASH_REDIS_REST_URL!, token: process.env.UPSTASH_REDIS_REST_TOKEN! });

export async function generateWithCache(prompt: string, task: string) {
  const cacheKey = `ai:${task}:${hashPrompt(prompt)}`;
  const cached = await redis.get<string>(cacheKey);
  if (cached) return cached;

  const result = await generateWithTracking(task, prompt);
  await redis.set(cacheKey, result, { ex: 3600 }); // Cache 1 hour
  return result;
}

// BAD: One API call per item
for (const item of items) {
  await generateWithTracking("classify", `Classify: ${item.text}`);
}

// GOOD: Batch into one call
const batchPrompt = items.map((item, i) => `${i + 1}. ${item.text}`).join("\n");
const result = await generateWithTracking(
  "classify",
  `Classify each item:\n${batchPrompt}\n\nReturn JSON array of classifications.`
);

## Monthly Cost Review — [Month Year]

### Service Costs
| Service | Budget | Actual | % Used | Action |
|---------|--------|--------|--------|--------|
| Gemini API | $100 | $ | % | |
| Supabase | $25 | $ | % | |
| Vercel | $20 | $ | % | |
| Upstash | $10 | $ | % | |
| Resend | $20 | $ | % | |
| Stripe fees | N/A | $ | N/A | |
| **Total** | **$175** | **$** | | |

### Top Token Consumers
| Feature | Tokens Used | Cost | Model |
|---------|------------|------|-------|
| | | | |

### Action Items
- [ ] Review and clean up unused Supabase storage
- [ ] Check for expensive queries (pg_stat_statements)
- [ ] Verify model routing is using cheapest appropriate model
- [ ] Delete old token_usage records (> 90 days)

Service	Free Tier	Typical Monthly (Small App)	Cost Driver
Gemini 2.0 Flash	15 RPM free	$0.10-5 per 1M tokens	Token volume
Gemini 2.5 Pro	2 RPM free	$1.25-10 per 1M tokens	Token volume
Gemini 2.0 Flash-Lite	30 RPM free	$0.02-1 per 1M tokens	Token volume
Supabase	500MB DB, 1GB storage	$25/mo (Pro)	Rows, storage, bandwidth
Vercel	100GB bandwidth	$20/mo (Pro)	Function invocations, bandwidth
Upstash Redis

Service	Free Tier	Typical Monthly (Small App)	Cost Driver
Gemini 2.0 Flash	15 RPM free	$0.10-5 per 1M tokens	Token volume
Gemini 2.5 Pro	2 RPM free	$1.25-10 per 1M tokens	Token volume
Gemini 2.0 Flash-Lite	30 RPM free	$0.02-1 per 1M tokens	Token volume
Supabase	500MB DB, 1GB storage	$25/mo (Pro)	Rows, storage, bandwidth
Vercel	100GB bandwidth	$20/mo (Pro)	Function invocations, bandwidth
Upstash Redis

Cost Guardian

1. Cost Landscape

Cost Guardian

1. Cost Landscape

2. Gemini Model Tier Routing

3. Token Usage Tracking

4. Supabase Cost Optimization

5. Budget Alerts

6. Cost-Aware Architecture Patterns

Cache AI Responses

Batch Operations

7. Monthly Cost Review Template

Rules

Llm Trading Agent Security

Energy Procurement

Council

Carrier Relationship Management

Market Research

Market Research