DEV.TO ARTICLE 46: Natural Language to Database Queries: Building SQL Copilots with Claude

Target Keyword: "natural language to sql claude"
Tags: database,sql,ai,programming,developer
Type: Tutorial

Content

Natural Language to Database Queries: Building SQL Copilots with Claude

Converting natural language questions into SQL queries is one of the most practical AI coding applications. Here's how to build a SQL copilot that understands your database schema and generates accurate queries.

Core Architecture

import json
from typing import Optional

class SQLCopilot:
    def __init__(self, api_key: str, schema: dict):
        self.api_key = api_key
        self.schema = schema
        self.system_prompt = self._build_schema_prompt()

    def _build_schema_prompt(self) -> str:
        schema_text = json.dumps(self.schema, indent=2)
        return f"""
You are an expert SQL developer. Given a database schema and a natural language question, generate an accurate SQL query.

Database Schema:
{schema_text}

Rules:
1. Always prefix column names with table name (e.g., users.id)
2. Use appropriate JOINs when querying across tables
3. Add LIMIT if the query might return many rows
4. Use aliases for table names when doing JOINs
5. For date filtering, use ISO format: 'YYYY-MM-DD'

Return ONLY the SQL query, no explanation.
"""

    async def ask(self, question: str) -> str:
        """Convert natural language to SQL."""
        response = await call_ofox_api(
            self.api_key,
            messages=[
                {"role": "system", "content": self.system_prompt},
                {"role": "user", "content": question}
            ]
        )
        return extract_sql(response)

    async def explain(self, sql: str) -> str:
        """Explain what a SQL query does."""
        response = await call_ofox_api(
            self.api_key,
            messages=[
                {"role": "system", "content": "You are a database expert. Explain what this SQL query does in simple terms."},
                {"role": "user", "content": f"Explain this query:\n{sql}"}
            ]
        )
        return response

Defining Your Database Schema

schema = {
    "tables": {
        "users": {
            "columns": {
                "id": "INTEGER PRIMARY KEY",
                "email": "VARCHAR(255) UNIQUE NOT NULL",
                "created_at": "TIMESTAMP DEFAULT CURRENT_TIMESTAMP",
                "plan": "VARCHAR(50) DEFAULT 'free'"
            },
            "description": "Registered users of the platform"
        },
        "orders": {
            "columns": {
                "id": "INTEGER PRIMARY KEY",
                "user_id": "INTEGER REFERENCES users(id)",
                "amount": "DECIMAL(10,2)",
                "status": "VARCHAR(50)",
                "created_at": "TIMESTAMP"
            },
            "description": "Customer orders"
        },
        "products": {
            "columns": {
                "id": "INTEGER PRIMARY KEY",
                "name": "VARCHAR(255)",
                "price": "DECIMAL(10,2)",
                "category": "VARCHAR(100)"
            },
            "description": "Available products"
        },
        "order_items": {
            "columns": {
                "id": "INTEGER PRIMARY KEY",
                "order_id": "INTEGER REFERENCES orders(id)",
                "product_id": "INTEGER REFERENCES products(id)",
                "quantity": "INTEGER",
                "unit_price": "DECIMAL(10,2)"
            },
            "description": "Individual items in an order"
        }
    },
    "relationships": [
        "users.id → orders.user_id (one-to-many)",
        "orders.id → order_items.order_id (one-to-many)",
        "products.id → order_items.product_id (one-to-many)"
    ]
}

SQL Generation Examples

copilot = SQLCopilot(api_key, schema)

# Example 1
question = "Show me the top 10 customers by total spending"
sql = await copilot.ask(question)
# SELECT u.email, SUM(o.amount) as total_spent
# FROM users u
# JOIN orders o ON u.id = o.user_id
# GROUP BY u.id, u.email
# ORDER BY total_spent DESC
# LIMIT 10

# Example 2
question = "What products were ordered most in the last month?"
sql = await copilot.ask(question)
# SELECT p.name, SUM(oi.quantity) as total_quantity
# FROM products p
# JOIN order_items oi ON p.id = oi.product_id
# JOIN orders o ON oi.order_id = o.id
# WHERE o.created_at >= CURRENT_DATE - INTERVAL '1 month'
# GROUP BY p.id, p.name
# ORDER BY total_quantity DESC

# Example 3
question = "Find all users who haven't placed an order in 90 days"
sql = await copilot.ask(question)
# SELECT u.email, u.created_at
# FROM users u
# LEFT JOIN orders o ON u.id = o.user_id
# GROUP BY u.id, u.email, u.created_at
# HAVING MAX(o.created_at) < CURRENT_DATE - INTERVAL '90 days' OR COUNT(o.id) = 0

Building an Interactive SQL Editor

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

app = FastAPI()

class QueryRequest(BaseModel):
    question: str
    user_id: Optional[int] = None  # For access control

@app.post("/query")
async def natural_language_query(request: QueryRequest):
    # Verify user has access
    user = await get_user(request.user_id)
    if not user:
        raise HTTPException(status_code=403, detail="Access denied")

    # Generate SQL
    copilot = SQLCopilot(API_KEY, get_user_schema(user))
    sql = await copilot.ask(request.question)

    # Validate generated SQL (security check)
    if not is_safe_sql(sql):
        raise HTTPException(status_code=400, detail="Unsafe query")

    # Execute with read-only access
    result = await execute_readonly(sql)

    return {
        "sql": sql,
        "results": result,
        "row_count": len(result)
    }

def is_safe_sql(sql: str) -> bool:
    """Basic SQL injection prevention."""
    dangerous = ['DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'TRUNCATE', ';--']
    upper_sql = sql.upper()
    return not any(cmd in upper_sql for cmd in dangerous)

Handling Complex Queries

async def copilot_batch_questions(copilot: SQLCopilot, questions: list[str]) -> list[str]:
    """Ask multiple questions and return all SQL queries."""
    results = []
    for q in questions:
        sql = await copilot.ask(q)
        results.append({"question": q, "sql": sql})
        await asyncio.sleep(0.5)  # Rate limiting
    return results