Building Production AI Agents with DeepSeek V4 API: A Complete 2026 Guide
DeepSeek V4 dropped in April 2026 with a 1M token context window, native MCP support, and agentic coding benchmarks that beat GPT-4o. At a fraction of the API cost. Here's how to actually build agents with it.
⚠️ Migration Warning: The old
deepseek-chatanddeepseek-reasonerendpoints will be deprecated on July 24, 2026. Migrate todeepseek-v4-proanddeepseek-v4-r1now.
What's New in DeepSeek V4 (Quick Recap)
| Feature | DeepSeek V4 Pro | DeepSeek V4 Flash |
|---|---|---|
| Context Window | 1M tokens | 128K tokens |
| Agentic Coding | SOTA | Fast |
| Function Calling | ✅ Native | ✅ Native |
| MCP Support | ✅ | ✅ |
| Input Price | $0.14/1M tokens | $0.07/1M tokens |
| Speed | ~45 tok/s | ~120 tok/s |
The V4 Pro model is the one you want for agents — the 1M context lets you pass your entire codebase, long conversation history, or thousands of tool call results without truncating.
Setup
pip install openai # DeepSeek uses OpenAI-compatible API
from openai import OpenAI
client = OpenAI(
api_key="YOUR_DEEPSEEK_API_KEY", # platform.deepseek.com
base_url="https://api.deepseek.com"
)
That's it — same SDK, different base_url.
1. Basic Agent with Tool Use
DeepSeek V4 supports OpenAI-style function calling natively.
from openai import OpenAI
import json
client = OpenAI(api_key="YOUR_KEY", base_url="https://api.deepseek.com")
# Define tools
tools = [
{
"type": "function",
"function": {
"name": "search_web",
"description": "Search the web for current information",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
"max_results": {"type": "integer", "default": 5}
},
"required": ["query"]
}
}
},
{
"type": "function",
"function": {
"name": "run_code",
"description": "Execute Python code and return the output",
"parameters": {
"type": "object",
"properties": {
"code": {"type": "string", "description": "Python code to run"}
},
"required": ["code"]
}
}
}
]
def run_agent(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
while True:
response = client.chat.completions.create(
model="deepseek-v4-pro",
messages=messages,
tools=tools,
tool_choice="auto"
)
msg = response.choices[0].message
# No tool calls → final answer
if not msg.tool_calls:
return msg.content
# Process tool calls
messages.append(msg)
for tool_call in msg.tool_calls:
fn_name = tool_call.function.name
fn_args = json.loads(tool_call.function.arguments)
# Execute the tool
if fn_name == "search_web":
result = search_web(fn_args["query"])
elif fn_name == "run_code":
result = execute_python(fn_args["code"])
else:
result = f"Unknown tool: {fn_name}"
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": str(result)
})
# Run it
answer = run_agent("What's the current DeepSeek V4 API pricing?")
print(answer)
2. Reasoning Mode (R1 Style Thinking)
DeepSeek V4 includes a reasoning model (deepseek-v4-r1) that shows its thinking chain. Essential for math, coding, and multi-step planning.
response = client.chat.completions.create(
model="deepseek-v4-r1",
messages=[{
"role": "user",
"content": "Design a rate limiting system for a multi-agent API with 1000 agents/s peak load"
}]
)
# Access the reasoning chain
thinking = response.choices[0].message.reasoning_content
final_answer = response.choices[0].message.content
print("=== THINKING ===")
print(thinking[:500], "...")
print("\n=== ANSWER ===")
print(final_answer)
3. Long-Context Agent (1M Token Window)
The killer feature of V4 Pro. Pass your entire codebase in a single call.
import os
from pathlib import Path
def load_codebase(repo_path: str, extensions: list = ['.py', '.ts', '.go']) -> str:
# Load entire codebase into a single string
files = []
for ext in extensions:
for fp in Path(repo_path).rglob(f'*{ext}'):
try:
content = fp.read_text(encoding='utf-8', errors='ignore')
files.append(f"### File: {fp.relative_to(repo_path)}\n```
{% endraw %}
\n{content}\n
{% raw %}
```")
except:
pass
return "\n\n".join(files)
def codebase_agent(repo_path: str, question: str) -> str:
codebase = load_codebase(repo_path)
token_estimate = len(codebase) // 4 # rough estimate
print(f"Loading ~{token_estimate:,} tokens into context...")
response = client.chat.completions.create(
model="deepseek-v4-pro", # 1M context window
messages=[
{
"role": "system",
"content": "You are a senior software engineer. Analyze the codebase and answer questions accurately."
},
{
"role": "user",
"content": f"CODEBASE:\n{codebase}\n\nQUESTION: {question}"
}
],
max_tokens=4096
)
return response.choices[0].message.content
# Example: analyze your own project
answer = codebase_agent("./my-agent-project", "Where are the memory bottlenecks?")
4. Multi-Agent System with DeepSeek V4
Build a research → analysis → write pipeline using DeepSeek V4 as the backbone.
from dataclasses import dataclass
from typing import Optional
import json
@dataclass
class AgentMessage:
role: str
content: str
agent_name: str
class DeepSeekAgent:
def __init__(self, name: str, system_prompt: str, model: str = "deepseek-v4-pro"):
self.name = name
self.model = model
self.system_prompt = system_prompt
self.client = OpenAI(api_key="YOUR_KEY", base_url="https://api.deepseek.com")
def run(self, message: str, context: Optional[str] = None) -> str:
messages = [{"role": "system", "content": self.system_prompt}]
if context:
messages.append({"role": "user", "content": f"Context from previous agent:\n{context}"})
messages.append({"role": "user", "content": message})
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=2048
)
return response.choices[0].message.content
# Create the pipeline
researcher = DeepSeekAgent(
name="Researcher",
system_prompt="You research topics thoroughly and produce factual summaries with sources.",
model="deepseek-v4-flash" # Fast + cheap for research
)
analyst = DeepSeekAgent(
name="Analyst",
system_prompt="You analyze research findings and identify key insights, trends, and gaps.",
model="deepseek-v4-pro" # Smart for analysis
)
writer = DeepSeekAgent(
name="Writer",
system_prompt="You write clear, engaging technical blog posts from analysis.",
model="deepseek-v4-pro"
)
# Run pipeline
topic = "Impact of MCP protocol on AI agent development in 2026"
research = researcher.run(f"Research: {topic}")
print(f"[Researcher] {research[:200]}...")
analysis = analyst.run(f"Analyze insights on: {topic}", context=research)
print(f"[Analyst] {analysis[:200]}...")
article = writer.run(f"Write a blog post about: {topic}", context=f"{research}\n\n{analysis}")
print(f"[Writer] Generated {len(article)} chars")
5. DeepSeek V4 + LangGraph for Production
Combine DeepSeek V4's reasoning with LangGraph's state management:
from langgraph.graph import StateGraph, END
from langchain_openai import ChatOpenAI
from typing import TypedDict
# Point LangChain at DeepSeek
llm = ChatOpenAI(
model="deepseek-v4-pro",
openai_api_key="YOUR_KEY",
openai_api_base="https://api.deepseek.com"
)
class ResearchState(TypedDict):
query: str
research: str
analysis: str
final_report: str
iteration: int
quality_score: float
def research_node(state: ResearchState) -> ResearchState:
response = llm.invoke(f"Research thoroughly: {state['query']}")
return {"research": response.content, "iteration": state["iteration"] + 1}
def analysis_node(state: ResearchState) -> ResearchState:
response = llm.invoke(f"Analyze: {state['research']}\n\nProvide quality score 0-10.")
content = response.content
score = float(content.split("Score:")[-1].strip()[:3]) if "Score:" in content else 7.0
return {"analysis": content, "quality_score": score}
def should_revise(state: ResearchState) -> str:
if state["quality_score"] < 7.0 and state["iteration"] < 3:
return "research"
return "write"
def write_node(state: ResearchState) -> ResearchState:
response = llm.invoke(f"Write report based on: {state['analysis']}")
return {"final_report": response.content}
graph = StateGraph(ResearchState)
graph.add_node("research", research_node)
graph.add_node("analysis", analysis_node)
graph.add_node("write", write_node)
graph.set_entry_point("research")
graph.add_edge("research", "analysis")
graph.add_conditional_edges("analysis", should_revise, {"research": "research", "write": "write"})
graph.add_edge("write", END)
app = graph.compile()
result = app.invoke({"query": "DeepSeek V4 vs GPT-4o for coding agents", "iteration": 0, "quality_score": 0.0})
print(result["final_report"])
Cost Comparison: DeepSeek V4 vs Alternatives
| Model | Input (1M tokens) | Output (1M tokens) | Notes |
|---|---|---|---|
| DeepSeek V4 Pro | $0.14 | $0.28 | 1M ctx window |
| DeepSeek V4 Flash | $0.07 | $0.14 | 128K ctx |
| GPT-4o | $2.50 | $10.00 | 128K ctx |
| Claude 3.5 Sonnet | $3.00 | $15.00 | 200K ctx |
| Gemini 1.5 Pro | $1.25 | $5.00 | 2M ctx |
DeepSeek V4 Pro is ~18x cheaper than GPT-4o for input tokens. For agents that make hundreds of LLM calls per task, this changes the economics entirely.
Migration Checklist (Before July 24, 2026)
# OLD (will break July 24)
model = "deepseek-chat"
model = "deepseek-reasoner"
# NEW
model = "deepseek-v4-pro" # replaces deepseek-chat
model = "deepseek-v4-r1" # replaces deepseek-reasoner
model = "deepseek-v4-flash" # new: fast/cheap option
Check the full migration guide at DeepSeek Platform Docs.
Find DeepSeek, LangGraph, CrewAI, and 400+ AI agent tools at AgDex.ai.
Top comments (0)