Name: VictoriaMetrics Monitoring Skill
Author: cloudthinker-ai

搜索技能.../

VictoriaMetrics Monitoring Skill | Skills Pool

#!/bin/bash

vm_query() {
    local metricsql="$1"
    local time="${2:-$(date +%s)}"
    curl -s "${VM_BASE_URL}/api/v1/query" \
        --data-urlencode "query=${metricsql}" \
        --data-urlencode "time=${time}"
}

vm_query_range() {
    local metricsql="$1"
    local start="${2:-$(( $(date +%s) - 3600 ))}"
    local end="${3:-$(date +%s)}"
    local step="${4:-60}"
    curl -s "${VM_BASE_URL}/api/v1/query_range" \
        --data-urlencode "query=${metricsql}" \
        --data-urlencode "start=${start}" \
        --data-urlencode "end=${end}" \
        --data-urlencode "step=${step}"
}

vm_api() {
    local endpoint="$1"
    curl -s "${VM_BASE_URL}${endpoint}"
}

{
    vm_query "up" &
    vm_api "/api/v1/status/tsdb" &
    vm_api "/api/v1/status/active_queries" &
}
wait

#!/bin/bash
echo "=== Available Metric Names (top 20) ==="
vm_api "/api/v1/label/__name__/values" | jq -r '.data[:20][]'

echo ""
echo "=== Label Names ==="
vm_api "/api/v1/labels" | jq -r '.data[]' | head -20

echo ""
echo "=== TSDB Status ==="
vm_api "/api/v1/status/tsdb" | jq '{
    totalSeries: .data.totalSeries,
    totalLabelValuePairs: .data.totalLabelValuePairs,
    seriesCountByMetricName: [.data.seriesCountByMetricName[:10][] | "\(.name): \(.value)"]
}'

#!/bin/bash
echo "=== VictoriaMetrics Storage Health ==="
{
    echo "--- TSDB Stats ---"
    vm_api "/api/v1/status/tsdb" | jq '{
        totalSeries: .data.totalSeries,
        totalLabelValuePairs: .data.totalLabelValuePairs
    }' &

    echo "--- Active Queries ---"
    vm_api "/api/v1/status/active_queries" | jq '.data | length | "Active queries: \(.)"' -r &

    echo "--- Build Info ---"
    vm_api "/flags" 2>/dev/null | grep -E "retentionPeriod|storageDataPath" | head -5 &
}
wait

echo ""
echo "=== Top Series by Metric Name ==="
vm_api "/api/v1/status/tsdb" \
    | jq -r '.data.seriesCountByMetricName[:15][] | "\(.name)\t\(.value) series"'

#!/bin/bash
echo "=== High Cardinality Metrics ==="
vm_api "/api/v1/status/tsdb" \
    | jq -r '.data.seriesCountByMetricName | sort_by(-.value)[:15][] | "\(.name)\t\(.value) series"'

echo ""
echo "=== High Cardinality Labels ==="
vm_api "/api/v1/status/tsdb" \
    | jq -r '.data.seriesCountByLabelValuePair | sort_by(-.value)[:15][] | "\(.name)\t\(.value) series"'

echo ""
echo "=== Label Value Counts ==="
for label in $(vm_api "/api/v1/labels" | jq -r '.data[]' | head -10); do
    count=$(vm_api "/api/v1/label/${label}/values" | jq '.data | length')
    echo "$label: $count unique values"
done | sort -t: -k2 -rn

#!/bin/bash
echo "=== CPU Usage by Instance ==="
vm_query 'avg(rate(node_cpu_seconds_total{mode!="idle"}[5m])) by (instance) * 100' \
    | jq -r '.data.result[] | "\(.metric.instance)\t\(.value[1] | tonumber | . * 10 | round / 10)%"' \
    | sort -t$'\t' -k2 -rn | head -15

echo ""
echo "=== Memory Usage ==="
vm_query '(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100' \
    | jq -r '.data.result[] | "\(.metric.instance)\t\(.value[1] | tonumber | round)%"' \
    | sort -t$'\t' -k2 -rn | head -15

echo ""
echo "=== Disk Usage ==="
vm_query '(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100' \
    | jq -r '.data.result[] | "\(.metric.instance)\t\(.value[1] | tonumber | round)%"' | head -15

#!/bin/bash
echo "=== Ingestion Rate ==="
vm_query 'rate(vm_rows_inserted_total[5m])' \
    | jq -r '.data.result[] | "\(.metric.type // "total")\t\(.value[1] | tonumber | round) rows/s"'

echo ""
echo "=== Storage Size ==="
vm_query 'vm_data_size_bytes' \
    | jq -r '.data.result[] | "\(.metric.type // "total")\t\(.value[1] | tonumber / 1073741824 | . * 100 | round / 100)GB"'

echo ""
echo "=== Merge Operations ==="
vm_query 'rate(vm_merges_total[5m])' \
    | jq -r '.data.result[] | "\(.metric.type)\t\(.value[1] | tonumber | . * 100 | round / 100) merges/s"'

#!/bin/bash
echo "=== Cluster Node Status ==="
{
    echo "--- vmselect ---"
    vm_query 'up{job=~".*vmselect.*"}' \
        | jq -r '.data.result[] | "\(.metric.instance)\tup:\(.value[1])"' &

    echo "--- vmstorage ---"
    vm_query 'up{job=~".*vmstorage.*"}' \
        | jq -r '.data.result[] | "\(.metric.instance)\tup:\(.value[1])"' &

    echo "--- vminsert ---"
    vm_query 'up{job=~".*vminsert.*"}' \
        | jq -r '.data.result[] | "\(.metric.instance)\tup:\(.value[1])"' &
}
wait

Monitoring Victoria Metrics Report
══════════════════════════════════
Resources discovered: [count]

Resource       Status    Key Metric    Issues
──────────────────────────────────────────────
[name]         [ok/warn] [value]       [findings]

Summary: [total] resources | [ok] healthy | [warn] warnings | [crit] critical
Action Items: [list of prioritized findings]

Shortcut	Counter	Why
"I'll skip discovery and check known resources"	Always run Phase 1 discovery first	Resource names change, new resources appear — assumed names cause errors
"The user only asked for a quick check"	Follow the full discovery → analysis flow	Quick checks miss critical issues; structured analysis catches silent failures
"Default configuration is probably fine"	Audit configuration explicitly	Defaults often leave logging, security, and optimization features disabled
"Metrics aren't needed for this"	Always check relevant metrics when available	API/CLI responses show current state; metrics reveal trends and intermittent issues
"I don't have access to that"	Try the command and report the actual error	Assumed permission failures prevent useful investigation; actual errors are informative

VictoriaMetrics Monitoring Skill

API Conventions

Authentication

Base URL

Output Rules

Core Helper Function

VictoriaMetrics Monitoring Skill

API Conventions

Authentication

Base URL

Output Rules

Core Helper Function

Parallel Execution

Anti-Hallucination Rules

Phase 1: Discovery

Common Operations

Storage Health & Performance

Cardinality Analysis

MetricsQL Queries

Ingestion & Retention

Cluster Health (vmselect/vminsert/vmstorage)

Output Format

Counter-Rationalizations

Common Pitfalls

Clickhouse Io

Clickhouse Io

Claude Devfleet

Clickhouse Io

Ai First Engineering

Postgres Patterns