Building Test Preparation Tools with LLMs

#learnai #oxlo #ai

We are building an adaptive test preparation tutor that generates practice questions, grades answers, and builds a personalized review schedule based on weak areas. It is a single Python script that uses Oxlo.ai's request-based API, so long prompts with full session history do not drive up cost. This is useful for students, certification candidates, or anyone building structured study tools.

What you'll need

Python 3.10 or higher
An Oxlo.ai API key from https://portal.oxlo.ai
The OpenAI SDK: pip install openai

The system prompt

Every call uses the same system prompt so the model stays in character and returns predictable JSON. I define it once at the top of the script.

SYSTEM_PROMPT = """You are an adaptive test preparation tutor. Follow the user's instruction exactly and return valid JSON.

When generating a question, return: question (string), options (list of 4 strings), correct_index (integer 0-3), explanation (string), concept_tag (string).

When evaluating an answer, return: is_correct (boolean), feedback (string), weak_concept (string or null).

When creating a review schedule, return: schedule (list of objects with concept and review_in_days)."""

Step 1: Generate targeted practice questions

I start with a function that asks the model for a multiple-choice question on a given topic. Forcing JSON output keeps the rest of the script simple and parser-safe.

import json
from openai import OpenAI

client = OpenAI(base_url="https://api.oxlo.ai/v1", api_key="YOUR_OXLO_API_KEY")

SYSTEM_PROMPT = """You are an adaptive test preparation tutor. Follow the user's instruction exactly and return valid JSON.

When generating a question, return: question (string), options (list of 4 strings), correct_index (integer 0-3), explanation (string), concept_tag (string).

When evaluating an answer, return: is_correct (boolean), feedback (string), weak_concept (string or null).

When creating a review schedule, return: schedule (list of objects with concept and review_in_days)."""

def generate_question(topic, difficulty="medium"):
    user_message = f"""Generate one {difficulty} multiple-choice question about: {topic}.
Return only the JSON question object."""
    response = client.chat.completions.create(
        model="llama-3.3-70b",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_message},
        ],
        response_format={"type": "json_object"},
    )
    return json.loads(response.choices[0].message.content)

if __name__ == "__main__":
    q = generate_question("TCP/IP subnetting", "medium")
    print(json.dumps(q, indent=2))

Step 2: Evaluate answers with concept tagging

Next, I send the user's choice back to the model along with the correct answer. The response includes targeted feedback and tags the weak concept so the script can track it later.

import json
from openai import OpenAI

client = OpenAI(base_url="https://api.oxlo.ai/v1", api_key="YOUR_OXLO_API_KEY")

SYSTEM_PROMPT = """You are an adaptive test preparation tutor. Follow the user's instruction exactly and return valid JSON.

When generating a question, return: question (string), options (list of 4 strings), correct_index (integer 0-3), explanation (string), concept_tag (string).

When evaluating an answer, return: is_correct (boolean), feedback (string), weak_concept (string or null).

When creating a review schedule, return: schedule (list of objects with concept and review_in_days)."""

def evaluate_answer(question_data, user_choice_index):
    user_message = f"""Evaluate this answer.
Question: {question_data['question']}
User selected index: {user_choice_index}
Correct index: {question_data['correct_index']}
Explanation: {question_data['explanation']}
Concept tag: {question_data['concept_tag']}

Return only the JSON evaluation object."""
    response = client.chat.completions.create(
        model="llama-3.3-70b",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_message},
        ],
        response_format={"type": "json_object"},
    )
    return json.loads(response.choices[0].message.content)

if __name__ == "__main__":
    sample = {
        "question": "Which layer of the OSI model handles routing?",
        "options": ["Transport", "Network", "Data Link", "Session"],
        "correct_index": 1,
        "explanation": "The Network layer (Layer 3) handles routing and logical addressing.",
        "concept_tag": "osi-model"
    }
    print(json.dumps(evaluate_answer(sample, 0), indent=2))

Step 3: Build an adaptive study loop

Now I wire the pieces together into a loop that maintains a performance history. After each question, the script counts incorrect answers per concept. When requesting the next question, it passes these weak areas to the model so the tutor can target them.

import json

from openai import OpenAI

client = OpenAI(base_url="https://api.oxlo.ai/v1", api_key="YOUR_OXLO_API_KEY")

SYSTEM_PROMPT = """You are an adaptive test preparation tutor. Follow the user's instruction exactly and return valid JSON.

When generating a question, return: question (string), options (list of 4 strings), correct_index (integer 0-3), explanation (string), concept_tag (string).

When evaluating an answer, return: is_correct (boolean), feedback (string), weak_concept (string or null).

When creating a review schedule, return: schedule (list of objects with concept and review_in_days)."""

def generate_question(topic, difficulty="medium", weak_concepts=None):

    focus = ""

    if weak_concepts:

        focus = f"The user is struggling with: {', '.join(weak_concepts)}. Target one of these concepts.\n"

    user_message = f"{focus}Generate one {difficulty} multiple-choice question about: {topic}.\nReturn only the JSON question object."

    response = client.chat.completions.create(

        model="llama-3.3-70b",

        messages=[

            {"role": "system", "content": SYSTEM_PROMPT},

            {"role": "user", "content": user_message},

        ],

        response_format={"type": "json_object"},

    )

    return json.loads(response.choices[0].message.content)

def evaluate_answer(question_data, user_choice_index):

    user_message = f"""Evaluate this answer.

Question: {question_data['question']}

User selected index: {user_choice_index}

Correct index: {question_data['correct_index']}

Explanation: {question_data['explanation']}

Concept tag: {question_data['concept_tag']}

Return only the JSON evaluation object."""

    response = client.chat.completions.create(

        model="llama-3.3-70b",

        messages=[

            {"role": "system", "content": SYSTEM_PROMPT},

            {"role": "user", "content": user_message},

        ],

        response_format={"type": "json_object"},

    )

    return json.loads(response.choices[0].message.content)

if name == "main":

    history = {}

    topic = "Computer Networking"

q = generate_question(topic, "medium")
print(f"Q: {q['question']}")
# Simulate a wrong answer
result = evaluate_answer(q, 2)
print(f"Feedback: {result['feedback']}")
if not result['is_correct'] and result.get('weak_concept'):
    history[result['weak_concept']] = history.get(result['weak_concept'], 0) + 1

weak = [c for c, count in history.items() if count