Name: Telemetry
Author: Issacchaos

搵技能.../

Telemetry | Skills Pool

def log_telemetry(event_type: str, agent_id: str, status: str, metadata: dict,
                  project_root: str, team_name: str = None) -> dict

{
  "success": boolean,           # Always True (even if write failed)
  "log_file": string | None,    # Path to log file (if write succeeded)
  "warning": string | None      # Warning message (if write failed)
}

# Coordinator spawns write-agent
agent_id = "write-agent-1"
parent_id = "testing-coordinator"
depth = 2

# Log telemetry
log_telemetry(
    event_type="lifecycle",
    agent_id=agent_id,
    status="spawned",
    metadata={
        "parent": parent_id,
        "depth": depth,
        "agent_type": "agents/write-agent.md",
        "team_id": "testing-parallel"
    },
    project_root=project_root,
    team_name="testing-parallel"
)

# Continue execution (telemetry failure doesn't halt)

2026-02-13T14:30:16.000Z | lifecycle | write-agent-1 | spawned | {"parent":"testing-coordinator","depth":2,"agent_type":"agents/write-agent.md","team_id":"testing-parallel"}

# Coordinator proposes execution plan
plan_summary = "3 parallel write-agents for 12 test targets"
batches = 3
total_targets = 12

# Log telemetry
log_telemetry(
    event_type="coordination",
    agent_id="testing-coordinator",
    status="plan_proposed",
    metadata={
        "plan_summary": plan_summary,
        "batches": batches,
        "total_targets": total_targets,
        "approval_required": True
    },
    project_root=project_root,
    team_name="testing-parallel"
)

# Present plan to user (telemetry logged before approval gate)

2026-02-13T14:30:01.456Z | coordination | testing-coordinator | plan_proposed | {"plan_summary":"3 parallel write-agents for 12 test targets","batches":3,"total_targets":12,"approval_required":true}

# Write-agent reports intermediate progress
tests_written = 2
current_file = "test_user_service.py"
percent_complete = 40

# Log telemetry
log_telemetry(
    event_type="progress",
    agent_id="write-agent-1",
    status="update",
    metadata={
        "tests_written": tests_written,
        "current_file": current_file,
        "percent_complete": percent_complete
    },
    project_root=project_root
)

# Continue writing tests (telemetry is background activity)

2026-02-13T14:30:20.000Z | progress | write-agent-1 | update | {"tests_written":2,"current_file":"test_user_service.py","percent_complete":40}

# Execute-agent finishes running tests
passed = 12
failed = 3
skipped = 0
duration_seconds = 8.5

# Log telemetry
log_telemetry(
    event_type="test",
    agent_id="execute-agent",
    status="execution_complete",
    metadata={
        "passed": passed,
        "failed": failed,
        "skipped": skipped,
        "duration_seconds": duration_seconds,
        "pass_rate": passed / (passed + failed)
    },
    project_root=project_root
)

# Continue with validation phase

2026-02-13T14:30:48.500Z | test | execute-agent | execution_complete | {"passed":12,"failed":3,"skipped":0,"duration_seconds":8.5,"pass_rate":0.8}

# Resource manager updates queue status
active_agents = 5
queued_agents = 2
max_agents = 5

# Log telemetry
log_telemetry(
    event_type="resource",
    agent_id="testing-coordinator",
    status="queue_status",
    metadata={
        "active_agents": active_agents,
        "queued_agents": queued_agents,
        "max_agents": max_agents
    },
    project_root=project_root
)

# Continue spawning agents from queue

2026-02-13T14:31:05.789Z | resource | testing-coordinator | queue_status | {"active_agents":5,"queued_agents":2,"max_agents":5}

# Agent completes and reports token usage
agent_id = "write-agent-1"
input_tokens = 1500
output_tokens = 800
cache_read_tokens = 500
cache_write_tokens = 200

# Log telemetry
log_telemetry(
    event_type="resource",
    agent_id=agent_id,
    status="token_usage",
    metadata={
        "agent_id": agent_id,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "total_tokens": input_tokens + output_tokens,
        "cache_read_tokens": cache_read_tokens,
        "cache_write_tokens": cache_write_tokens,
        "model": "sonnet"
    },
    project_root=project_root
)

# Continue with next agent

2026-02-13T14:30:30.000Z | resource | write-agent-1 | token_usage | {"agent_id":"write-agent-1","input_tokens":1500,"output_tokens":800,"total_tokens":2300,"cache_read_tokens":500,"cache_write_tokens":200,"model":"sonnet"}

log_telemetry(
    event_type="config",
    agent_id="testing-coordinator",
    status="config_loaded",
    metadata={
        "team_name": "testing-parallel",
        "file_path": "teams/testing-parallel.md",
        "frontmatter_raw": {
            "name": "testing-parallel",
            "coordinator": "teams/testing-parallel-coordinator.md",
            "max_agents": 5,
            "timeout_minutes": 30,
            "approval_gates": {"before_execution": True, "after_completion": False}
        },
        "file_size_bytes": 2048,
        "parse_duration_ms": 12
    },
    project_root=project_root,
    team_name="testing-parallel"
)

log_telemetry(
    event_type="dependency",
    agent_id="testing-coordinator",
    status="graph_constructed",
    metadata={
        "adjacency_list": {
            "analyze-agent": [],
            "write-agent-1": ["analyze-agent"],
            "write-agent-2": ["analyze-agent"],
            "execute-agent": ["write-agent-1", "write-agent-2"]
        },
        "topological_order": ["analyze-agent", "write-agent-1", "write-agent-2", "execute-agent"],
        "total_nodes": 4,
        "total_edges": 4,
        "has_cycles": False
    },
    project_root=project_root,
    team_name="testing-parallel"
)

log_telemetry(
    event_type="agent_io",
    agent_id="write-agent-1",
    status="prompt_constructed",
    metadata={
        "agent_id": "write-agent-1",
        "prompt_text": "You are write-agent-1 in the testing-parallel team...",
        "prompt_length_chars": 4500,
        "prompt_estimated_tokens": 1200,
        "model": "sonnet",
        "includes_context": True
    },
    project_root=project_root
)

log_telemetry(
    event_type="approval",
    agent_id="testing-coordinator",
    status="user_response_received",
    metadata={
        "gate_type": "before_execution",
        "decision": "modify",
        "feedback_text": "Reduce to 2 parallel agents instead of 3",
        "response_time_seconds": 8.5,
        "iteration": 1
    },
    project_root=project_root,
    team_name="testing-parallel"
)

log_telemetry(
    event_type="timing",
    agent_id="testing-coordinator",
    status="phase_transition",
    metadata={
        "from_phase": 1,
        "to_phase": 2,
        "transition_duration_seconds": 0.45,
        "idle_time_seconds": 0.12,
        "overhead_description": "Dependency resolution and agent prompt construction"
    },
    project_root=project_root,
    team_name="testing-parallel"
)

log_telemetry(
    event_type="error",
    agent_id="write-agent-2",
    status="error_classified",
    metadata={
        "agent_id": "write-agent-2",
        "error_category": "timeout",
        "raw_error": "Agent exceeded 120s timeout while generating tests for src/complex_service.py",
        "classified_severity": "retriable",
        "suggested_action": "Retry with increased timeout or simpler prompt",
        "stack_trace": None
    },
    project_root=project_root
)

log_telemetry(
    event_type="cost",
    agent_id="testing-coordinator",
    status="cumulative_cost",
    metadata={
        "team_id": "testing-parallel",
        "total_input_tokens": 15000,
        "total_output_tokens": 8500,
        "total_cache_read_tokens": 3000,
        "total_cache_write_tokens": 1200,
        "estimated_cost_usd": 0.47,
        "cost_by_model": {"sonnet": 0.35, "opus": 0.12},
        "cost_by_agent": {"analyzer": 0.08, "writer-1": 0.15, "writer-2": 0.12, "executor": 0.12},
        "agents_completed": 4,
        "agents_remaining": 0
    },
    project_root=project_root,
    team_name="testing-parallel"
)

log_telemetry(
    event_type="test",
    agent_id="execute-agent",
    status="test_case_result",
    metadata={
        "test_name": "test_user_creation_with_valid_email",
        "test_file": "tests/test_user_service.py",
        "status": "passed",
        "duration_seconds": 0.23,
        "failure_message": None,
        "failure_traceback": None,
        "stdout_capture": "",
        "stderr_capture": "",
        "assertions_checked": 3
    },
    project_root=project_root
)

log_telemetry(
    event_type="environment",
    agent_id="testing-coordinator",
    status="session_start",
    metadata={
        "session_id": "sess-20260325T143000-a7f2",
        "plugin_version": "1.0.0",
        "claude_code_version": None,
        "platform": "win32",
        "os_version": "Windows 11 Enterprise 10.0.26100",
        "shell": "bash",
        "working_directory": "D:/dev/my-project",
        "git_branch": "main",
        "git_commit": "abc1234",
        "git_dirty": False,
        "node_version": "v20.11.0",
        "python_version": "3.12.1"
    },
    project_root=project_root,
    team_name="testing-parallel"
)

# Enable telemetry
export TEAMSTERS_TELEMETRY=1

# Run command with telemetry
/team-run testing-parallel src/

// .claude/teamsters-config.json
{
  "telemetry_enabled": true
}

# teams/testing-parallel.md
---
telemetry_enabled: true
# ... other team fields
---

D:/dev/teamsters-plugin/.claude/telemetry-2026-02-13T14-30-00.log

Channel	Output	Enable With	Default
File	`.claude/telemetry-{timestamp}.log`	`--telemetry`	Active when telemetry enabled
Console	Live terminal output	`--telemetry-console` or `TEAMSTERS_TELEMETRY_CONSOLE=1`	Disabled
Webhook	HTTP POST to endpoint	`TEAMSTERS_TELEMETRY_WEBHOOK=url`	Disabled

# CLI flag
/team-run my-team --telemetry-console

# Environment variable
export TEAMSTERS_TELEMETRY_CONSOLE=1

# Team definition
telemetry_console: true

[14:30:00] ENV        coordinator    session    platform=win32 branch=main
[14:30:01] CONFIG     coordinator    loaded     my-team (teams/my-team.md)
[14:30:01] DEPEND     coordinator    graph      2 nodes, 1 edge, no cycles
[14:30:10] APPROVAL   coordinator    approved   before_execution (8.6s)
[14:30:10] LIFECYCLE  analyzer       spawned    Explore, depth=2
[14:30:25] LIFECYCLE  analyzer       completed  14.9s
[14:30:25] COST       coordinator    running    $0.008 (1/2 agents)
[14:30:45] LIFECYCLE  writer         completed  19.5s
[14:30:45] TIMING     coordinator    overhead   44.7s wall, 34.4s work, 23% tax
[14:30:45] ENV        coordinator    session    completed, $0.019 total

# Basic
export TEAMSTERS_TELEMETRY_WEBHOOK=http://localhost:8080/events

# With authentication
export TEAMSTERS_TELEMETRY_WEBHOOK_TOKEN=my-api-key

# Batch mode (buffer 10 events per POST)
export TEAMSTERS_TELEMETRY_WEBHOOK_BATCH=10

{
  "timestamp": "2026-03-25T14:30:25.100Z",
  "event_type": "lifecycle",
  "agent_id": "analyzer",
  "status": "completed",
  "team_name": "my-team",
  "session_id": "sess-my-team-20260325",
  "metadata": { "duration_seconds": 14.9, "model_used": "sonnet" }
}

Content-Type: application/json
X-Teamsters-Event: lifecycle
X-Teamsters-Status: completed
X-Teamsters-Team: my-team
X-Teamsters-Session: sess-my-team-20260325
Authorization: Bearer {token}

# Unix/macOS
tail -f .claude/telemetry-*.log

# Windows PowerShell
Get-Content .claude\telemetry-*.log -Wait -Tail 0

# Pretty-print JSON metadata with jq
tail -f .claude/telemetry-*.log | while read line; do
  echo "$line" | sed 's/.*| //' | jq '.' 2>/dev/null || echo "$line"
done

# Filter specific event types
tail -f .claude/telemetry-*.log | grep "lifecycle\|error\|cost"

# Filter by agent
tail -f .claude/telemetry-*.log | grep "write-agent-1"

export TEAMSTERS_TELEMETRY_CONSOLE=1
export TEAMSTERS_TELEMETRY_WEBHOOK=http://localhost:8080/events
/team-run my-team --telemetry

[timestamp] | [event_type] | [agent_id] | [status] | [metadata_json]

def log_telemetry(event_type: str, agent_id: str, status: str, metadata: dict,
                  project_root: str, team_name: str = None) -> dict:
    """
    Log telemetry event with structured format.

    This is the main entry point for all telemetry logging.
    """
    try:
        # Check if telemetry is enabled
        if not is_telemetry_enabled():
            # Silent no-op if disabled
            return {'success': True, 'log_file': None, 'warning': None}

        # Generate timestamp (ISO 8601 UTC with milliseconds)
        timestamp = datetime.now(timezone.utc).isoformat(timespec='milliseconds')

        # Serialize metadata to JSON
        metadata_json = json.dumps(metadata, separators=(',', ':'))

        # Format event (pipe-delimited)
        event = f"{timestamp} | {event_type} | {agent_id} | {status} | {metadata_json}"

        # Write event to log file (non-blocking)
        result = write_telemetry_event(event, project_root, team_name)

        return result

    except Exception as e:
        # CRITICAL: Never raise exception from telemetry
        warning_msg = f"Telemetry logging failed (non-critical): {e}"
        log_warning(warning_msg)
        return {'success': True, 'log_file': None, 'warning': warning_msg}

# Example: Coordinator spawns agent with telemetry

Telemetry

Telemetry Skill

Overview

Key Features

Telemetry

Telemetry Skill

Overview

Key Features

Entry Point Function

log_telemetry()

Usage Examples

Example 1: Lifecycle Event - Agent Spawned

Example 2: Coordination Event - Plan Proposed

Example 3: Progress Event - Agent Update

Example 4: Test Event - Execution Complete

Example 5: Resource Event - Queue Status

Example 6: Resource Event - Token Usage (REQ-F-27a)

Example 7: Config Event - Team Definition Loaded

Example 8: Dependency Event - Graph Constructed

Example 9: Agent I/O Event - Prompt Constructed

Example 10: Approval Event - User Response Received

Example 11: Timing Event - Phase Transition

Example 12: Error Event - Error Classified

Example 13: Cost Event - Cumulative Cost

Example 14: Test Telemetry - Individual Test Result

Example 15: Environment Event - Session Start

Configuration

Enabling Telemetry (REQ-F-22)

Log File Location (REQ-F-21)

Real-Time Delivery

Channel Overview

Console Output

Webhook Streaming

Live File Tailing

Combining Channels

Implementation Details

Sub-Skills

Event Formatting (REQ-F-28)

Non-Blocking Guarantee (REQ-NF-6)

Bluebubbles

Add Tracing

Analytics Events

Add Expert

Arthas

Arthas Eagleeye Traceid