DEV Community

Cover image for Production-Ready AI: Observability, Testing, and Cost Control for LLM Applications
Brian Spann
Brian Spann

Posted on

Production-Ready AI: Observability, Testing, and Cost Control for LLM Applications

You've built an LLM-powered feature. It works in development. Users love the demo.

Then it goes to production.

Suddenly, you're facing questions you didn't consider: How much is this costing? Why did that response take 30 seconds? Did we just hit our rate limit? Why can't I reproduce this bug?

This final article covers the patterns that separate prototypes from production systems.

Observability: You Can't Fix What You Can't See

LLM calls are black boxes. You send tokens in, tokens come out. Without proper observability, debugging is guesswork.

OpenTelemetry Tracing

Instrument every LLM call with distributed tracing:

public class TracingChatClientMiddleware : DelegatingChatClient
{
    private static readonly ActivitySource ActivitySource = new("AI.ChatClient");
    private readonly string _serviceName;

    public TracingChatClientMiddleware(IChatClient inner, string serviceName = "ai-service") 
        : base(inner)
    {
        _serviceName = serviceName;
    }

    public override async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        using var activity = ActivitySource.StartActivity(
            "chat.completion",
            ActivityKind.Client);

        if (activity == null)
            return await base.CompleteAsync(chatMessages, options, cancellationToken);

        // Request attributes
        activity.SetTag("ai.service", _serviceName);
        activity.SetTag("ai.model", options?.ModelId ?? "default");
        activity.SetTag("ai.message_count", chatMessages.Count);
        activity.SetTag("ai.has_tools", options?.Tools?.Any() ?? false);

        // Calculate input tokens (approximate)
        var inputText = string.Join("\n", chatMessages.Select(m => m.Text));
        activity.SetTag("ai.input_length", inputText.Length);

        var stopwatch = Stopwatch.StartNew();

        try
        {
            var result = await base.CompleteAsync(chatMessages, options, cancellationToken);

            stopwatch.Stop();

            // Response attributes
            activity.SetTag("ai.status", "success");
            activity.SetTag("ai.finish_reason", result.FinishReason?.ToString());
            activity.SetTag("ai.duration_ms", stopwatch.ElapsedMilliseconds);

            if (result.Usage != null)
            {
                activity.SetTag("ai.input_tokens", result.Usage.InputTokenCount);
                activity.SetTag("ai.output_tokens", result.Usage.OutputTokenCount);
                activity.SetTag("ai.total_tokens", result.Usage.TotalTokenCount);
            }

            // Track function calls
            var functionCalls = result.Message.Contents
                .OfType<FunctionCallContent>()
                .ToList();

            if (functionCalls.Any())
            {
                activity.SetTag("ai.function_calls", 
                    string.Join(",", functionCalls.Select(f => f.Name)));
            }

            return result;
        }
        catch (Exception ex)
        {
            stopwatch.Stop();

            activity.SetTag("ai.status", "error");
            activity.SetTag("ai.error_type", ex.GetType().Name);
            activity.SetTag("ai.error_message", ex.Message);
            activity.SetTag("ai.duration_ms", stopwatch.ElapsedMilliseconds);
            activity.SetStatus(ActivityStatusCode.Error, ex.Message);

            throw;
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Registration with OpenTelemetry

builder.Services.AddOpenTelemetry()
    .ConfigureResource(resource => resource
        .AddService("my-ai-service"))
    .WithTracing(tracing => tracing
        .AddSource("AI.ChatClient")
        .AddSource("AI.FunctionCalling")
        .AddAspNetCoreInstrumentation()
        .AddHttpClientInstrumentation()
        .AddOtlpExporter());

// Register the traced chat client
builder.Services.AddChatClient(sp =>
{
    var inner = CreateChatClient(sp);

    return inner
        .AsBuilder()
        .Use((client, _) => new TracingChatClientMiddleware(client))
        .Build(sp);
});
Enter fullscreen mode Exit fullscreen mode

Structured Logging

Complement tracing with structured logs:

public class LoggingChatClientMiddleware : DelegatingChatClient
{
    private readonly ILogger<LoggingChatClientMiddleware> _logger;

    public LoggingChatClientMiddleware(IChatClient inner, ILogger<LoggingChatClientMiddleware> logger) 
        : base(inner)
    {
        _logger = logger;
    }

    public override async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        var correlationId = Activity.Current?.Id ?? Guid.NewGuid().ToString();

        using var scope = _logger.BeginScope(new Dictionary<string, object>
        {
            ["CorrelationId"] = correlationId,
            ["Model"] = options?.ModelId ?? "default",
            ["MessageCount"] = chatMessages.Count
        });

        _logger.LogDebug(
            "Starting chat completion with {MessageCount} messages",
            chatMessages.Count);

        var stopwatch = Stopwatch.StartNew();

        try
        {
            var result = await base.CompleteAsync(chatMessages, options, cancellationToken);

            _logger.LogInformation(
                "Chat completion succeeded in {DurationMs}ms. " +
                "Tokens: {InputTokens} in, {OutputTokens} out",
                stopwatch.ElapsedMilliseconds,
                result.Usage?.InputTokenCount ?? 0,
                result.Usage?.OutputTokenCount ?? 0);

            return result;
        }
        catch (Exception ex)
        {
            _logger.LogError(ex,
                "Chat completion failed after {DurationMs}ms",
                stopwatch.ElapsedMilliseconds);
            throw;
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Testing LLM Applications

Testing non-deterministic systems requires different strategies than traditional unit tests.

Mock Providers for Unit Tests

public class MockChatClient : IChatClient
{
    private readonly Queue<ChatCompletion> _responses = new();
    private readonly List<IList<ChatMessage>> _receivedMessages = new();

    public ChatClientMetadata Metadata => new("mock", new Uri("http://localhost"), "mock-model");

    public void EnqueueResponse(string text)
    {
        var message = new ChatMessage(ChatRole.Assistant, text);
        var completion = new ChatCompletion(message);
        _responses.Enqueue(completion);
    }

    public void EnqueueFunctionCall(string name, object arguments)
    {
        var content = new FunctionCallContent(
            Guid.NewGuid().ToString(),
            name,
            new Dictionary<string, object?>(
                arguments.GetType()
                    .GetProperties()
                    .ToDictionary(p => p.Name, p => p.GetValue(arguments))));

        var message = new ChatMessage(ChatRole.Assistant, new[] { content });
        _responses.Enqueue(new ChatCompletion(message));
    }

    public Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        _receivedMessages.Add(chatMessages);

        if (_responses.Count == 0)
            throw new InvalidOperationException("No responses queued");

        return Task.FromResult(_responses.Dequeue());
    }

    public IAsyncEnumerable<StreamingChatCompletionUpdate> CompleteStreamingAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        throw new NotImplementedException();
    }

    public void Dispose() { }

    public TService? GetService<TService>(object? key = null) where TService : class => null;

    // Assertions
    public void AssertMessageContains(string text)
    {
        var allMessages = _receivedMessages.SelectMany(m => m).ToList();
        Assert.Contains(allMessages, m => m.Text?.Contains(text) == true);
    }

    public void AssertSystemPromptContains(string text)
    {
        var systemMessages = _receivedMessages
            .SelectMany(m => m)
            .Where(m => m.Role == ChatRole.System);

        Assert.Contains(systemMessages, m => m.Text?.Contains(text) == true);
    }
}
Enter fullscreen mode Exit fullscreen mode

Unit Tests with Mocks

public class ContentServiceTests
{
    [Fact]
    public async Task SummarizeAsync_ReturnsSummary()
    {
        // Arrange
        var mockClient = new MockChatClient();
        mockClient.EnqueueResponse("This is a brief summary of the content.");

        var service = new ContentService(mockClient);

        // Act
        var result = await service.SummarizeAsync("Long content here...");

        // Assert
        Assert.Equal("This is a brief summary of the content.", result);
        mockClient.AssertSystemPromptContains("summarizer");
    }

    [Fact]
    public async Task ProcessOrder_CallsCorrectFunction()
    {
        // Arrange
        var mockClient = new MockChatClient();
        mockClient.EnqueueFunctionCall("get_order_status", new { orderId = "ORD-123" });
        mockClient.EnqueueResponse("Your order ORD-123 is being shipped.");

        var orderFunctions = new Mock<OrderFunctions>();
        orderFunctions
            .Setup(f => f.GetOrderStatusAsync("ORD-123"))
            .ReturnsAsync(new OrderInfo { Status = "Shipped" });

        var service = new FunctionCallingService(mockClient, orderFunctions.Object);

        // Act
        var result = await service.ProcessAsync("Where is my order ORD-123?");

        // Assert
        Assert.Contains("shipped", result.ToLower());
        orderFunctions.Verify(f => f.GetOrderStatusAsync("ORD-123"), Times.Once);
    }
}
Enter fullscreen mode Exit fullscreen mode

Integration Tests with Real Models

For behavior testing, use real models but with controlled inputs:

public class IntegrationTests : IClassFixture<AITestFixture>
{
    private readonly IChatClient _chatClient;

    public IntegrationTests(AITestFixture fixture)
    {
        _chatClient = fixture.ChatClient;
    }

    [Fact]
    [Trait("Category", "Integration")]
    public async Task ExtractProductInfo_ReturnsValidStructure()
    {
        // Arrange
        var review = """
            I bought this laptop last month. The battery life is amazing - 
            easily lasts 10 hours. The keyboard is comfortable but the trackpad 
            is a bit small. Overall, great value for $899.
            """;

        var options = new ChatOptions
        {
            ResponseFormat = ChatResponseFormat.ForJsonSchema<ProductReview>()
        };

        // Act
        var response = await _chatClient.CompleteAsync(
            new ChatMessage(ChatRole.User, $"Extract product info:\n\n{review}"),
            options);

        var result = JsonSerializer.Deserialize<ProductReview>(response.Message.Text!);

        // Assert
        Assert.NotNull(result);
        Assert.InRange(result.Rating, 1, 5);
        Assert.NotEmpty(result.Pros);
        Assert.NotEmpty(result.Cons);
        Assert.True(result.Summary.Length > 10);
    }

    [Theory]
    [InlineData("Hello", false)] // Not a support request
    [InlineData("I want to return my order", true)] // Is a support request
    [InlineData("What's the capital of France?", false)] // General question
    [InlineData("My package never arrived", true)] // Support request
    [Trait("Category", "Integration")]
    public async Task ClassifyIntent_CorrectlyIdentifiesSupportRequests(
        string input, 
        bool expectedIsSupport)
    {
        // Act
        var response = await _chatClient.CompleteAsync(new[]
        {
            new ChatMessage(ChatRole.System, 
                "Respond with only 'SUPPORT' or 'OTHER' based on whether " +
                "this is a customer support request."),
            new ChatMessage(ChatRole.User, input)
        });

        var isSupport = response.Message.Text?.Trim() == "SUPPORT";

        // Assert
        Assert.Equal(expectedIsSupport, isSupport);
    }
}

public class AITestFixture : IDisposable
{
    public IChatClient ChatClient { get; }

    public AITestFixture()
    {
        // Use a test-specific model or configuration
        ChatClient = new OpenAIClient(Environment.GetEnvironmentVariable("OPENAI_API_KEY")!)
            .AsChatClient("gpt-4o-mini"); // Cheaper model for tests
    }

    public void Dispose()
    {
        (ChatClient as IDisposable)?.Dispose();
    }
}
Enter fullscreen mode Exit fullscreen mode

Evaluation Testing

For subjective quality, use LLM-as-judge:

public class EvaluationTests
{
    private readonly IChatClient _chatClient;
    private readonly IChatClient _evaluator;

    [Fact]
    public async Task SupportResponse_MeetsQualityStandards()
    {
        // Generate response
        var response = await _chatClient.CompleteAsync(new[]
        {
            new ChatMessage(ChatRole.System, "You are a customer support agent."),
            new ChatMessage(ChatRole.User, "My order is late and I'm frustrated!")
        });

        // Evaluate with LLM
        var evaluation = await _evaluator.CompleteAsync(new[]
        {
            new ChatMessage(ChatRole.System, """
                Evaluate this customer support response on these criteria:
                1. Empathy (1-5): Does it acknowledge the customer's feelings?
                2. Helpfulness (1-5): Does it offer concrete next steps?
                3. Professionalism (1-5): Is the tone appropriate?

                Respond in JSON: {"empathy": N, "helpfulness": N, "professionalism": N, "notes": "..."}
                """),
            new ChatMessage(ChatRole.User, $"Response to evaluate:\n\n{response.Message.Text}")
        }, new ChatOptions { ResponseFormat = ChatResponseFormat.Json });

        var scores = JsonSerializer.Deserialize<EvaluationScores>(evaluation.Message.Text!);

        // Assert minimum quality
        Assert.True(scores.Empathy >= 3, $"Empathy too low: {scores.Notes}");
        Assert.True(scores.Helpfulness >= 3, $"Helpfulness too low: {scores.Notes}");
        Assert.True(scores.Professionalism >= 4, $"Professionalism too low: {scores.Notes}");
    }
}

public record EvaluationScores(int Empathy, int Helpfulness, int Professionalism, string Notes);
Enter fullscreen mode Exit fullscreen mode

Cost Management

LLM costs can explode without proper controls.

Cost Tracking Middleware

public class CostTrackingMiddleware : DelegatingChatClient
{
    private readonly ICostCalculator _calculator;
    private readonly IMetrics _metrics;
    private readonly CostTrackingOptions _options;

    public CostTrackingMiddleware(
        IChatClient inner,
        ICostCalculator calculator,
        IMetrics metrics,
        IOptions<CostTrackingOptions> options) : base(inner)
    {
        _calculator = calculator;
        _metrics = metrics;
        _options = options.Value;
    }

    public override async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        var result = await base.CompleteAsync(chatMessages, options, cancellationToken);

        if (result.Usage == null) return result;

        var modelId = options?.ModelId ?? _options.DefaultModel;
        var cost = _calculator.Calculate(
            modelId,
            result.Usage.InputTokenCount ?? 0,
            result.Usage.OutputTokenCount ?? 0);

        // Record metrics
        _metrics.RecordCost(cost, new Dictionary<string, object>
        {
            ["model"] = modelId,
            ["service"] = Activity.Current?.GetTagItem("service") ?? "unknown"
        });

        _metrics.RecordTokens(
            result.Usage.InputTokenCount ?? 0,
            result.Usage.OutputTokenCount ?? 0,
            modelId);

        return result;
    }
}

public interface ICostCalculator
{
    decimal Calculate(string model, int inputTokens, int outputTokens);
}

public class OpenAICostCalculator : ICostCalculator
{
    // Prices per 1M tokens (as of 2024)
    private readonly Dictionary<string, (decimal Input, decimal Output)> _prices = new()
    {
        ["gpt-4o"] = (2.50m, 10.00m),
        ["gpt-4o-mini"] = (0.15m, 0.60m),
        ["gpt-4-turbo"] = (10.00m, 30.00m),
        ["gpt-3.5-turbo"] = (0.50m, 1.50m)
    };

    public decimal Calculate(string model, int inputTokens, int outputTokens)
    {
        if (!_prices.TryGetValue(model, out var prices))
            prices = _prices["gpt-4o"]; // Default to most common

        var inputCost = (inputTokens / 1_000_000m) * prices.Input;
        var outputCost = (outputTokens / 1_000_000m) * prices.Output;

        return inputCost + outputCost;
    }
}
Enter fullscreen mode Exit fullscreen mode

Budget Enforcement

public class BudgetEnforcementMiddleware : DelegatingChatClient
{
    private readonly IBudgetService _budgetService;
    private readonly ICostCalculator _calculator;
    private readonly ILogger<BudgetEnforcementMiddleware> _logger;

    public BudgetEnforcementMiddleware(
        IChatClient inner,
        IBudgetService budgetService,
        ICostCalculator calculator,
        ILogger<BudgetEnforcementMiddleware> logger) : base(inner)
    {
        _budgetService = budgetService;
        _calculator = calculator;
        _logger = logger;
    }

    public override async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        var budgetKey = GetBudgetKey();

        // Check budget before call
        var budget = await _budgetService.GetBudgetAsync(budgetKey);
        var spent = await _budgetService.GetSpentAsync(budgetKey);

        if (spent >= budget.Limit)
        {
            _logger.LogWarning(
                "Budget exceeded for {BudgetKey}. Limit: {Limit}, Spent: {Spent}",
                budgetKey, budget.Limit, spent);

            throw new BudgetExceededException(budgetKey, spent, budget.Limit);
        }

        // Warn if approaching limit
        var percentUsed = (spent / budget.Limit) * 100;
        if (percentUsed >= budget.WarnThreshold)
        {
            _logger.LogWarning(
                "Budget warning for {BudgetKey}: {Percent}% used",
                budgetKey, percentUsed);
        }

        // Make the call
        var result = await base.CompleteAsync(chatMessages, options, cancellationToken);

        // Record spend
        if (result.Usage != null)
        {
            var cost = _calculator.Calculate(
                options?.ModelId ?? "gpt-4o",
                result.Usage.InputTokenCount ?? 0,
                result.Usage.OutputTokenCount ?? 0);

            await _budgetService.RecordSpendAsync(budgetKey, cost);
        }

        return result;
    }

    private string GetBudgetKey()
    {
        // Use tenant, user, or service as budget key
        var userId = Activity.Current?.GetTagItem("user_id")?.ToString();
        var tenantId = Activity.Current?.GetTagItem("tenant_id")?.ToString();

        if (!string.IsNullOrEmpty(tenantId))
            return $"tenant:{tenantId}";
        if (!string.IsNullOrEmpty(userId))
            return $"user:{userId}";

        return "global";
    }
}

public class BudgetExceededException : Exception
{
    public string BudgetKey { get; }
    public decimal Spent { get; }
    public decimal Limit { get; }

    public BudgetExceededException(string budgetKey, decimal spent, decimal limit)
        : base($"Budget exceeded for {budgetKey}: {spent:C} of {limit:C}")
    {
        BudgetKey = budgetKey;
        Spent = spent;
        Limit = limit;
    }
}
Enter fullscreen mode Exit fullscreen mode

Resilience Patterns

LLM APIs fail. Rate limits happen. Build for it.

Retry with Exponential Backoff

public class RetryingChatClient : DelegatingChatClient
{
    private readonly ILogger<RetryingChatClient> _logger;
    private readonly RetryOptions _options;

    public RetryingChatClient(
        IChatClient inner,
        ILogger<RetryingChatClient> logger,
        IOptions<RetryOptions> options) : base(inner)
    {
        _logger = logger;
        _options = options.Value;
    }

    public override async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        var attempt = 0;
        var delay = _options.InitialDelay;

        while (true)
        {
            attempt++;

            try
            {
                return await base.CompleteAsync(chatMessages, options, cancellationToken);
            }
            catch (Exception ex) when (ShouldRetry(ex) && attempt < _options.MaxAttempts)
            {
                _logger.LogWarning(ex,
                    "Attempt {Attempt} failed, retrying in {Delay}ms",
                    attempt, delay.TotalMilliseconds);

                await Task.Delay(delay, cancellationToken);

                delay = TimeSpan.FromMilliseconds(
                    Math.Min(delay.TotalMilliseconds * 2, _options.MaxDelay.TotalMilliseconds));
            }
        }
    }

    private bool ShouldRetry(Exception ex)
    {
        return ex switch
        {
            HttpRequestException { StatusCode: HttpStatusCode.TooManyRequests } => true,
            HttpRequestException { StatusCode: HttpStatusCode.ServiceUnavailable } => true,
            HttpRequestException { StatusCode: HttpStatusCode.BadGateway } => true,
            HttpRequestException { StatusCode: HttpStatusCode.GatewayTimeout } => true,
            TaskCanceledException => false, // Don't retry cancellations
            _ => false
        };
    }
}

public class RetryOptions
{
    public int MaxAttempts { get; set; } = 3;
    public TimeSpan InitialDelay { get; set; } = TimeSpan.FromSeconds(1);
    public TimeSpan MaxDelay { get; set; } = TimeSpan.FromSeconds(30);
}
Enter fullscreen mode Exit fullscreen mode

Fallback to Alternative Providers

public class FallbackChatClient : IChatClient
{
    private readonly IChatClient _primary;
    private readonly IChatClient _fallback;
    private readonly ILogger<FallbackChatClient> _logger;

    public FallbackChatClient(
        IChatClient primary,
        IChatClient fallback,
        ILogger<FallbackChatClient> logger)
    {
        _primary = primary;
        _fallback = fallback;
        _logger = logger;
    }

    public ChatClientMetadata Metadata => _primary.Metadata;

    public async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        try
        {
            return await _primary.CompleteAsync(chatMessages, options, cancellationToken);
        }
        catch (Exception ex) when (ShouldFallback(ex))
        {
            _logger.LogWarning(ex,
                "Primary provider failed, falling back to secondary");

            return await _fallback.CompleteAsync(chatMessages, options, cancellationToken);
        }
    }

    private bool ShouldFallback(Exception ex)
    {
        return ex switch
        {
            HttpRequestException { StatusCode: HttpStatusCode.TooManyRequests } => true,
            HttpRequestException { StatusCode: HttpStatusCode.ServiceUnavailable } => true,
            TaskCanceledException when !CancellationToken.None.IsCancellationRequested => true,
            _ => false
        };
    }

    // ... implement other interface members
}
Enter fullscreen mode Exit fullscreen mode

Circuit Breaker

public class CircuitBreakerChatClient : DelegatingChatClient
{
    private readonly CircuitBreakerPolicy _policy;

    public CircuitBreakerChatClient(
        IChatClient inner,
        IOptions<CircuitBreakerOptions> options) : base(inner)
    {
        var opt = options.Value;

        _policy = Policy
            .Handle<HttpRequestException>()
            .CircuitBreakerAsync(
                exceptionsAllowedBeforeBreaking: opt.FailureThreshold,
                durationOfBreak: opt.BreakDuration,
                onBreak: (ex, duration) =>
                {
                    // Log circuit opened
                },
                onReset: () =>
                {
                    // Log circuit closed
                });
    }

    public override async Task<ChatCompletion> CompleteAsync(
        IList<ChatMessage> chatMessages,
        ChatOptions? options = null,
        CancellationToken cancellationToken = default)
    {
        return await _policy.ExecuteAsync(async () =>
            await base.CompleteAsync(chatMessages, options, cancellationToken));
    }
}
Enter fullscreen mode Exit fullscreen mode

Health Checks

Monitor your AI services:

public class ChatClientHealthCheck : IHealthCheck
{
    private readonly IChatClient _chatClient;
    private readonly ILogger<ChatClientHealthCheck> _logger;

    public ChatClientHealthCheck(IChatClient chatClient, ILogger<ChatClientHealthCheck> logger)
    {
        _chatClient = chatClient;
        _logger = logger;
    }

    public async Task<HealthCheckResult> CheckHealthAsync(
        HealthCheckContext context,
        CancellationToken cancellationToken = default)
    {
        try
        {
            var stopwatch = Stopwatch.StartNew();

            // Minimal completion to check connectivity
            var response = await _chatClient.CompleteAsync(
                new[] { new ChatMessage(ChatRole.User, "ping") },
                new ChatOptions { MaxOutputTokens = 5 },
                cancellationToken);

            stopwatch.Stop();

            var data = new Dictionary<string, object>
            {
                ["latency_ms"] = stopwatch.ElapsedMilliseconds,
                ["model"] = _chatClient.Metadata.ModelId ?? "unknown"
            };

            if (stopwatch.ElapsedMilliseconds > 5000)
            {
                return HealthCheckResult.Degraded(
                    $"High latency: {stopwatch.ElapsedMilliseconds}ms",
                    data: data);
            }

            return HealthCheckResult.Healthy("Chat client responding", data);
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Health check failed");

            return HealthCheckResult.Unhealthy(
                "Chat client unavailable",
                ex,
                new Dictionary<string, object>
                {
                    ["error"] = ex.Message
                });
        }
    }
}

// Registration
builder.Services.AddHealthChecks()
    .AddCheck<ChatClientHealthCheck>("ai-provider", tags: new[] { "ai", "ready" });
Enter fullscreen mode Exit fullscreen mode

Production Checklist

Before going live, verify:

Observability

  • [ ] OpenTelemetry tracing configured
  • [ ] Structured logging with correlation IDs
  • [ ] Dashboards for latency, errors, token usage
  • [ ] Alerts for error rates and latency spikes

Cost Control

  • [ ] Cost tracking per request
  • [ ] Budget limits per tenant/user
  • [ ] Alerts at 80% budget consumption
  • [ ] Model selection based on task complexity

Resilience

  • [ ] Retry logic with backoff
  • [ ] Circuit breaker for cascading failures
  • [ ] Fallback providers configured
  • [ ] Timeout limits set

Security

  • [ ] Input sanitization
  • [ ] Output filtering for PII
  • [ ] Session isolation verified
  • [ ] API keys rotated and secured

Testing

  • [ ] Unit tests with mocks
  • [ ] Integration tests for critical paths
  • [ ] Evaluation tests for quality
  • [ ] Load tests for capacity planning

Conclusion

Building production AI applications is more than just API calls. It requires the same engineering rigor as any production system—observability, testing, cost management, and resilience.

The patterns in this series give you a foundation:

  1. Part 1: Provider abstraction for flexibility
  2. Part 2: Function calling for reliable actions
  3. Part 3: Conversation management for stateful interactions
  4. Part 4: Production patterns for real-world deployment

Start simple, iterate based on production feedback, and remember: the goal isn't perfect AI—it's useful AI that works reliably for your users.


This concludes the "Generative AI Patterns in C#" series. These patterns have been battle-tested in production systems. Apply them thoughtfully, and your AI applications will be ready for the real world.

Top comments (0)