DEV Community

daniele pelleri
daniele pelleri

Posted on

Stop Burning Money on AI Tests: Build a Smart Mock System in 15 Minutes

I burned $3K testing AI agents before building this. Now my CI runs 200+ tests for $0. Here's the exact setup that saved my budget.


The Problem

Testing AI systems is expensive. Really expensive.

Every test run with real API calls costs money. My GitHub Actions were burning $40+ per push. Monthly bill hit $1,200 just for testing.

Sound familiar?

The Solution: Smart AI Mocking

Instead of avoiding tests (bad) or burning money (worse), build an intelligent mock system that:

  • ✅ Runs unlimited tests for $0
  • ✅ Provides deterministic responses
  • ✅ Switches seamlessly between mock/real
  • ✅ Takes 15 minutes to implement

Step 1: Create the AI Provider Interface (2 minutes)

# ai_provider.py
from abc import ABC, abstractmethod

class AIProvider(ABC):
    @abstractmethod
    def generate_response(self, prompt: str, model: str = "gpt-4") -> str:
        pass

    @abstractmethod
    def generate_structured(self, prompt: str, schema: dict) -> dict:
        pass
Enter fullscreen mode Exit fullscreen mode

Step 2: Build the Mock Provider (5 minutes)

# mock_provider.py
import re
import json
from ai_provider import AIProvider

class MockAIProvider(AIProvider):
    def __init__(self):
        self.response_patterns = {
            # Priority calculation
            r'priority.*score': '{"priority_score": 750}',

            # Task decomposition
            r'decompose.*task': '''{"tasks": [
                {"name": "Research", "priority": "high"},
                {"name": "Analysis", "priority": "medium"}
            ]}''',

            # Team composition
            r'team.*composition': '''{"team": [
                {"name": "John", "role": "Developer"},
                {"name": "Sarah", "role": "Designer"}
            ]}''',

            # Default response
            r'.*': 'Mock response for testing purposes'
        }

    def generate_response(self, prompt: str, model: str = "gpt-4") -> str:
        prompt_lower = prompt.lower()

        for pattern, response in self.response_patterns.items():
            if re.search(pattern, prompt_lower):
                return response

        return self.response_patterns[r'.*']

    def generate_structured(self, prompt: str, schema: dict) -> dict:
        response = self.generate_response(prompt)
        try:
            return json.loads(response)
        except json.JSONDecodeError:
            return {"mock": True, "response": response}
Enter fullscreen mode Exit fullscreen mode

Step 3: Real Provider Implementation (3 minutes)

# openai_provider.py
import openai
from ai_provider import AIProvider

class OpenAIProvider(AIProvider):
    def __init__(self, api_key: str):
        self.client = openai.OpenAI(api_key=api_key)

    def generate_response(self, prompt: str, model: str = "gpt-4") -> str:
        response = self.client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content

    def generate_structured(self, prompt: str, schema: dict) -> dict:
        # Add schema instruction to prompt
        schema_prompt = f"{prompt}\n\nRespond with valid JSON matching this schema: {schema}"
        response = self.generate_response(schema_prompt)
        return json.loads(response)
Enter fullscreen mode Exit fullscreen mode

Step 4: Smart Factory Pattern (3 minutes)

# ai_factory.py
import os
from mock_provider import MockAIProvider
from openai_provider import OpenAIProvider

class AIFactory:
    @staticmethod
    def create_provider():
        if os.getenv("TESTING") == "true":
            return MockAIProvider()

        if os.getenv("CI") == "true":
            return MockAIProvider()  # Never spend money in CI

        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY required for production")

        return OpenAIProvider(api_key)

# Usage in your code
ai_provider = AIFactory.create_provider()
response = ai_provider.generate_response("What is the priority of this task?")
Enter fullscreen mode Exit fullscreen mode

Step 5: Test Configuration (2 minutes)

# test_ai_agents.py
import os
import pytest

@pytest.fixture(autouse=True)
def setup_test_environment():
    os.environ["TESTING"] = "true"
    yield
    os.environ.pop("TESTING", None)

def test_task_prioritization():
    from ai_factory import AIFactory

    ai = AIFactory.create_provider()
    response = ai.generate_structured(
        "Calculate priority score for this task",
        {"priority_score": "number"}
    )

    assert "priority_score" in response
    assert isinstance(response["priority_score"], (int, str))
    assert response["priority_score"] == 750  # Deterministic!

def test_team_composition():
    from ai_factory import AIFactory

    ai = AIFactory.create_provider()
    response = ai.generate_structured(
        "Compose a team for this project",
        {"team": "array"}
    )

    assert "team" in response
    assert len(response["team"]) >= 2
Enter fullscreen mode Exit fullscreen mode

The Results

Before this setup:

  • 💸 $40 per CI run
  • 🐌 3-5 minutes per test suite
  • 🎲 Flaky, non-deterministic tests
  • 😰 Scared to run tests frequently

After this setup:

  • 💰 $0 for unlimited test runs
  • ⚡ 30 seconds per test suite
  • 🎯 Deterministic, reliable tests
  • 😎 Test-driven development restored

Production Usage

# In production
os.environ["TESTING"] = "false"  # Uses real OpenAI

# In CI/CD  
os.environ["CI"] = "true"  # Uses mocks

# In development
# No env vars = uses real API for manual testing
Enter fullscreen mode Exit fullscreen mode

Advanced: Smart Response Evolution

Make your mocks smarter over time:

class SmartMockProvider(MockAIProvider):
    def __init__(self):
        super().__init__()
        self.response_history = []

    def generate_response(self, prompt: str, model: str = "gpt-4") -> str:
        # Log what real responses look like
        response = super().generate_response(prompt, model)
        self.response_history.append((prompt, response))
        return response

    def export_real_responses(self):
        """Use this to improve mocks based on real API responses"""
        return self.response_history
Enter fullscreen mode Exit fullscreen mode

Your Turn

Clone this pattern for your AI tests. It takes 15 minutes and saves hundreds of dollars.

Questions:

  • What's your current testing budget for AI systems?
  • Have you tried other mocking approaches? How did they work?
  • What response patterns would you add to the mock provider?

Drop your own cost-saving testing patterns below! 👇


Want more AI engineering patterns? I've documented 42+ lessons building production AI systems - including the $3K mistake that taught me this lesson.

Top comments (0)