DEV Community

Cover image for How to Use OpenAI API for Text Generation
WDSEGA
WDSEGA

Posted on

How to Use OpenAI API for Text Generation

OpenAI's API has become the standard interface for working with large language models in production applications. Whether you're building chatbots, content generators, code assistants, or data processing pipelines, understanding how to effectively use the API is essential.

This guide covers everything from basic setup to advanced patterns for text generation with the OpenAI API.

Getting Started

Installation and Authentication

pip install openai
Enter fullscreen mode Exit fullscreen mode
from openai import OpenAI

# Initialize the client
client = OpenAI()  # Uses OPENAI_API_KEY env variable

# Or pass the key directly
client = OpenAI(api_key="your-api-key")
Enter fullscreen mode Exit fullscreen mode

Your First Generation

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain quantum computing in simple terms."}
    ],
    max_tokens=500,
    temperature=0.7
)

print(response.choices[0].message.content)
Enter fullscreen mode Exit fullscreen mode

Understanding the API Parameters

Temperature

Controls randomness in the output. Lower values (0-0.3) produce more focused, deterministic responses. Higher values (0.7-1.0) produce more creative, varied outputs.

# Deterministic output for factual tasks
factual = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What is 2+2?"}],
    temperature=0.0
)

# Creative output for writing tasks
creative = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Write a poem about the ocean."}],
    temperature=0.9
)
Enter fullscreen mode Exit fullscreen mode

Max Tokens and Token Management

import tiktoken

def count_tokens(text: str, model: str = "gpt-4o") -> int:
    """Count the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

# Estimate cost before making a request
prompt = "Summarize this article..."
token_count = count_tokens(prompt)
cost_per_1k_input = 0.005  # GPT-4o pricing
estimated_cost = (token_count / 1000) * cost_per_1k_input
print(f"Estimated cost: ${estimated_cost:.4f}")
Enter fullscreen mode Exit fullscreen mode

Response Format (Structured Output)

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "Extract information from the text."},
        {"role": "user", "content": "Apple was founded by Steve Jobs in 1976."}
    ],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "entity_extraction",
            "strict": True,
            "schema": {
                "type": "object",
                "properties": {
                    "company": {"type": "string"},
                    "founder": {"type": "string"},
                    "year": {"type": "integer"}
                },
                "required": ["company", "founder", "year"]
            }
        }
    }
)

import json
data = json.loads(response.choices[0].message.content)
# {'company': 'Apple', 'founder': 'Steve Jobs', 'year': 1976}
Enter fullscreen mode Exit fullscreen mode

Advanced Patterns

System Prompts Engineering

The system prompt is your most powerful tool for controlling output quality:

SYSTEM_PROMPT = """You are an expert technical writer. Follow these rules:

1. Use clear, concise language
2. Include code examples for all technical concepts
3. Structure responses with headers and bullet points
4. When explaining errors, always show the fix
5. Add "Pro Tip" sections for advanced insights
6. Never use filler phrases like "In conclusion" or "It's important to note"
"""

def generate_technical_explanation(topic: str) -> str:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"Explain {topic}"}
        ],
        temperature=0.3
    )
    return response.choices[0].message.content
Enter fullscreen mode Exit fullscreen mode

Multi-Turn Conversations

class ConversationManager:
    def __init__(self, system_prompt: str, model: str = "gpt-4o"):
        self.client = OpenAI()
        self.model = model
        self.messages = [
            {"role": "system", "content": system_prompt}
        ]
        self.max_history = 20  # Keep last 20 messages

    def add_message(self, role: str, content: str):
        self.messages.append({"role": role, "content": content})
        # Trim history to manage token usage
        if len(self.messages) > self.max_history + 1:
            self.messages = [self.messages[0]] + self.messages[-(self.max_history):]

    def get_response(self, user_input: str) -> str:
        self.add_message("user", user_input)

        response = self.client.chat.completions.create(
            model=self.model,
            messages=self.messages,
            temperature=0.7
        )

        assistant_message = response.choices[0].message.content
        self.add_message("assistant", assistant_message)

        return assistant_message

    def get_token_count(self) -> int:
        total = 0
        for msg in self.messages:
            total += count_tokens(msg["content"])
        return total
Enter fullscreen mode Exit fullscreen mode

Streaming Responses

def stream_response(prompt: str):
    """Stream responses for real-time display."""
    stream = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )

    full_response = ""
    for chunk in stream:
        if chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content
            full_response += content
            print(content, end='', flush=True)

    print()  # New line after streaming
    return full_response
Enter fullscreen mode Exit fullscreen mode

Function Calling

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g. 'San Francisco'"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"]
                    }
                },
                "required": ["location"]
            }
        }
    }
]

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
    tools=tools,
    tool_choice="auto"
)

# Check if the model wants to call a function
if response.choices[0].message.tool_calls:
    tool_call = response.choices[0].message.tool_calls[0]
    function_name = tool_call.function.name
    arguments = json.loads(tool_call.function.arguments)
    # Execute your function here
    print(f"Call {function_name} with args: {arguments}")
Enter fullscreen mode Exit fullscreen mode

Error Handling and Retry Logic

from openai import APITimeoutError, RateLimitError, APIConnectionError
import time

def robust_completion(messages, max_retries=3, **kwargs):
    """Call the API with robust error handling."""
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=kwargs.get('model', 'gpt-4o'),
                messages=messages,
                **kwargs
            )
            return response

        except RateLimitError:
            wait_time = (2 ** attempt) + random.uniform(0, 1)
            print(f"Rate limited. Waiting {wait_time:.1f}s...")
            time.sleep(wait_time)

        except APITimeoutError:
            print(f"Timeout on attempt {attempt + 1}")
            if attempt == max_retries - 1:
                raise

        except APIConnectionError:
            print(f"Connection error on attempt {attempt + 1}")
            time.sleep(2 ** attempt)

        except Exception as e:
            print(f"Unexpected error: {e}")
            raise

    raise RuntimeError("Max retries exceeded")
Enter fullscreen mode Exit fullscreen mode

For the complete guide with all code examples and advanced patterns, read the full article on our blog.


Originally published at WD Tech Blog. Follow for more Python tutorials, AI tools, and developer resources.

Top comments (0)