OpenAI's API has become the standard interface for working with large language models in production applications. Whether you're building chatbots, content generators, code assistants, or data processing pipelines, understanding how to effectively use the API is essential.
This guide covers everything from basic setup to advanced patterns for text generation with the OpenAI API.
Getting Started
Installation and Authentication
pip install openai
from openai import OpenAI
# Initialize the client
client = OpenAI() # Uses OPENAI_API_KEY env variable
# Or pass the key directly
client = OpenAI(api_key="your-api-key")
Your First Generation
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain quantum computing in simple terms."}
],
max_tokens=500,
temperature=0.7
)
print(response.choices[0].message.content)
Understanding the API Parameters
Temperature
Controls randomness in the output. Lower values (0-0.3) produce more focused, deterministic responses. Higher values (0.7-1.0) produce more creative, varied outputs.
# Deterministic output for factual tasks
factual = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
temperature=0.0
)
# Creative output for writing tasks
creative = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a poem about the ocean."}],
temperature=0.9
)
Max Tokens and Token Management
import tiktoken
def count_tokens(text: str, model: str = "gpt-4o") -> int:
"""Count the number of tokens in a text string."""
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
# Estimate cost before making a request
prompt = "Summarize this article..."
token_count = count_tokens(prompt)
cost_per_1k_input = 0.005 # GPT-4o pricing
estimated_cost = (token_count / 1000) * cost_per_1k_input
print(f"Estimated cost: ${estimated_cost:.4f}")
Response Format (Structured Output)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Extract information from the text."},
{"role": "user", "content": "Apple was founded by Steve Jobs in 1976."}
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "entity_extraction",
"strict": True,
"schema": {
"type": "object",
"properties": {
"company": {"type": "string"},
"founder": {"type": "string"},
"year": {"type": "integer"}
},
"required": ["company", "founder", "year"]
}
}
}
)
import json
data = json.loads(response.choices[0].message.content)
# {'company': 'Apple', 'founder': 'Steve Jobs', 'year': 1976}
Advanced Patterns
System Prompts Engineering
The system prompt is your most powerful tool for controlling output quality:
SYSTEM_PROMPT = """You are an expert technical writer. Follow these rules:
1. Use clear, concise language
2. Include code examples for all technical concepts
3. Structure responses with headers and bullet points
4. When explaining errors, always show the fix
5. Add "Pro Tip" sections for advanced insights
6. Never use filler phrases like "In conclusion" or "It's important to note"
"""
def generate_technical_explanation(topic: str) -> str:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Explain {topic}"}
],
temperature=0.3
)
return response.choices[0].message.content
Multi-Turn Conversations
class ConversationManager:
def __init__(self, system_prompt: str, model: str = "gpt-4o"):
self.client = OpenAI()
self.model = model
self.messages = [
{"role": "system", "content": system_prompt}
]
self.max_history = 20 # Keep last 20 messages
def add_message(self, role: str, content: str):
self.messages.append({"role": role, "content": content})
# Trim history to manage token usage
if len(self.messages) > self.max_history + 1:
self.messages = [self.messages[0]] + self.messages[-(self.max_history):]
def get_response(self, user_input: str) -> str:
self.add_message("user", user_input)
response = self.client.chat.completions.create(
model=self.model,
messages=self.messages,
temperature=0.7
)
assistant_message = response.choices[0].message.content
self.add_message("assistant", assistant_message)
return assistant_message
def get_token_count(self) -> int:
total = 0
for msg in self.messages:
total += count_tokens(msg["content"])
return total
Streaming Responses
def stream_response(prompt: str):
"""Stream responses for real-time display."""
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
stream=True
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
full_response += content
print(content, end='', flush=True)
print() # New line after streaming
return full_response
Function Calling
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g. 'San Francisco'"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
]
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
tools=tools,
tool_choice="auto"
)
# Check if the model wants to call a function
if response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
# Execute your function here
print(f"Call {function_name} with args: {arguments}")
Error Handling and Retry Logic
from openai import APITimeoutError, RateLimitError, APIConnectionError
import time
def robust_completion(messages, max_retries=3, **kwargs):
"""Call the API with robust error handling."""
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model=kwargs.get('model', 'gpt-4o'),
messages=messages,
**kwargs
)
return response
except RateLimitError:
wait_time = (2 ** attempt) + random.uniform(0, 1)
print(f"Rate limited. Waiting {wait_time:.1f}s...")
time.sleep(wait_time)
except APITimeoutError:
print(f"Timeout on attempt {attempt + 1}")
if attempt == max_retries - 1:
raise
except APIConnectionError:
print(f"Connection error on attempt {attempt + 1}")
time.sleep(2 ** attempt)
except Exception as e:
print(f"Unexpected error: {e}")
raise
raise RuntimeError("Max retries exceeded")
For the complete guide with all code examples and advanced patterns, read the full article on our blog.
Originally published at WD Tech Blog. Follow for more Python tutorials, AI tools, and developer resources.
Top comments (0)