Building an Observability Layer for Your AI Agent: ProteusTracer

#ai #debugging #python

Building an Observability Layer for Your AI Agent: ProteusTracer

What we're building

A production-grade observability layer for the Proteus agent framework that captures every LLM call, tool invocation, and token spend — so you can debug failures in seconds instead of hours.

Prerequisites

Node.js 18+
A Proteus agent project (or any agent framework)
TypeScript 5+
Basic familiarity with OpenTelemetry concepts

Step 1: Define the core interfaces

Start with the types that will power your observability. This is the contract every tracer must satisfy.

// types.ts
export interface Span {
  id: string;
  traceId: string;
  parentSpanId?: string;
  name: string;
  kind: SpanKind;
  status: SpanStatus;
  startTime: number;
  endTime?: number;
  attributes: Record<string, string | number | boolean>;
  events: SpanEvent[];
}

export interface SpanEvent {
  name: string;
  timestamp: number;
  attributes?: Record<string, string | number | boolean>;
}

export enum SpanKind {
  INTERNAL = 'INTERNAL',
  CLIENT = 'CLIENT',
  SERVER = 'SERVER',
  PRODUCER = 'PRODUCER',
  CONSUMER = 'CONSUMER',
}

export enum SpanStatus {
  OK = 'OK',
  ERROR = 'ERROR',
  UNKNOWN = 'UNKNOWN',
}

export interface Metric {
  name: string;
  value: number;
  unit: string;
  timestamp: number;
  labels: Record<string, string>;
}

export interface TraceContext {
  traceId: string;
  spanId: string;
  isSampled: boolean;
}

This gives you the building blocks. Every LLM call becomes a span. Every tool invocation becomes a child span. Every token count becomes a metric.

Step 2: Build the ProteusTracer

Now wire those interfaces into a working tracer that wraps your agent's execution flow.

// proteus-tracer.ts
import { Span, SpanKind, SpanStatus, Metric, TraceContext } from './types';
import { v4 as uuidv4 } from 'uuid';

export class ProteusTracer {
  private spans: Map<string, Span> = new Map();
  private metrics: Metric[] = [];
  private activeTrace?: TraceContext;

  constructor(private readonly serviceName: string) {}

  startTrace(): TraceContext {
    const traceId = uuidv4();
    const rootSpanId = uuidv4();

    const rootSpan: Span = {
      id: rootSpanId,
      traceId,
      name: `${this.serviceName}.run`,
      kind: SpanKind.SERVER,
      status: SpanStatus.UNKNOWN,
      startTime: Date.now(),
      attributes: {
        'proteus.service': this.serviceName,
        'proteus.version': '1.0.0',
      },
      events: [],
    };

    this.spans.set(rootSpanId, rootSpan);
    this.activeTrace = { traceId, spanId: rootSpanId, isSampled: true };

    return this.activeTrace;
  }

  startSpan(name: string, parentSpanId?: string): Span {
    const spanId = uuidv4();
    const traceId = this.activeTrace?.traceId || uuidv4();

    const span: Span = {
      id: spanId,
      traceId,
      parentSpanId: parentSpanId || this.activeTrace?.spanId,
      name,
      kind: SpanKind.INTERNAL,
      status: SpanStatus.UNKNOWN,
      startTime: Date.now(),
      attributes: {},
      events: [],
    };

    this.spans.set(spanId, span);
    return span;
  }

  endSpan(spanId: string, status: SpanStatus = SpanStatus.OK): void {
    const span = this.spans.get(spanId);
    if (span) {
      span.endTime = Date.now();
      span.status = status;
    }
  }

  addSpanEvent(spanId: string, name: string, attributes?: Record<string, string | number | boolean>): void {
    const span = this.spans.get(spanId);
    if (span) {
      span.events.push({ name, timestamp: Date.now(), attributes });
    }
  }

  recordMetric(name: string, value: number, unit: string, labels: Record<string, string> = {}): void {
    this.metrics.push({
      name,
      value,
      unit,
      timestamp: Date.now(),
      labels: { ...labels, service: this.serviceName },
    });
  }

  export(): { spans: Span[]; metrics: Metric[] } {
    return {
      spans: Array.from(this.spans.values()),
      metrics: this.metrics,
    };
  }

  reset(): void {
    this.spans.clear();
    this.metrics.clear();
    this.activeTrace = undefined;
  }
}

Step 3: Create a bridge handler for your agent

This is where the tracer meets your actual agent execution. The bridge handler wraps your agent's phases and instruments every call.


typescript
// bridge-handler.ts
import { ProteusTracer } from './proteus-tracer';
import { SpanKind, SpanStatus } from './types';

export class AgentBridgeHandler {
  private tracer: ProteusTracer;

  constructor(serviceName: string) {
    this.tracer = new ProteusTracer(serviceName);
  }

  async runAgent<T>(agentName: string, fn: (context: AgentContext) => Promise<T>): Promise<T> {
    const trace = this.tracer.startTrace();
    const context: AgentContext = {
      traceId: trace.traceId,
      agentName,
      instrumentLLMCall: (model, messages, callFn) => 
        this.instrumentLLMCall(model, messages, callFn),
      instrumentToolCall: (toolName, args, callFn) =>
        this.instrumentToolCall(toolName, args, callFn),
    };

    try {
      const result = await fn(context);
      this.tracer.endSpan(trace.spanId, SpanStatus.OK);
      return result;
    } catch (error) {
      this.tracer.addSpanEvent(trace.spanId, 'error', { 
        'error.message': (error as Error).message 
      });
      this.tracer.endSpan(trace.spanId, SpanStatus.ERROR);
      throw error;
    } finally {
      this.exportTrace();
    }
  }

  private async instrumentLLMCall<T>(
    model: string,
    messages: unknown[],
    callFn: () => Promise<T>
  ): Promise<T> {
    const span = this.tracer.startSpan(`llm.${model}`);
    this.tracer.addSpanEvent(span.id, 'llm.request', {
      'llm.model': model,
      'llm.messages.count': messages.length,
    });

    const startTime = Date.now();
    try {
      const result = await callFn();
      const duration = Date.now() - startTime;

      this.tracer.recordMetric('llm.duration', duration, 'ms', { model });
      this.tracer.addSpanEvent(span.id, 'llm.response', {
        'llm.duration.ms': duration,
      });
      this.tracer.endSpan(span.id, SpanStatus.OK);
      return result;
    } catch (error) {
      this.tracer.addSpanEvent(span.id, 'llm.error', {
        'error.message': (error as Error).message,
      });
      this.tracer.recordMetric('llm.errors', 1, 'count', { model });
      this.tracer.endSpan(span.id, SpanStatus.ERROR);
      throw error;
    }
  }

  private async instrumentToolCall<T>(
    toolName: string,
    args: unknown,
    callFn: () => Promise<T>
  ): Promise<T> {
    const span = this.tracer.startSpan(`tool.${toolName}`);
    this.tracer.addSpanEvent(span.id, 'tool.invoke', {
      'tool.name': toolName,
    });

    try {
      const result = await callFn();
      this.tracer.endSpan(span.id, SpanStatus.OK);
      return result;
    } catch (error) {
      this.tracer.addSpanEvent(span.id, 'tool.error', {
        'error.message': (error as Error).

---

**Debugging AI agents shouldn't feel like reading The Matrix.** 
Join other engineers who are building reliable autonomous workflows in our community: [TracePilot Discord](https://discord.gg/KzXRAXFM8)