Python JSON: Read, Write, Validate, and Pretty-Print (Complete Guide)
JSON is everywhere in Python automation โ config files, API responses, state tracking, task queues. If you're building anything beyond a one-off script, you'll need to read it, write it, and validate it correctly.
Here's everything the json module can do, with patterns you'll actually reuse.
๐ Free: AI Publishing Checklist โ 7 steps in Python ยท Full pipeline: germy5.gumroad.com/l/xhxkzz (pay what you want, min $9.99)
The four functions you need
import json
# File โ Python object
with open("data.json") as f:
data = json.load(f)
# String โ Python object
data = json.loads('{"key": "value"}')
# Python object โ file
with open("output.json", "w") as f:
json.dump(data, f, indent=2)
# Python object โ string
text = json.dumps(data, indent=2)
That's the core API. load/dump for files, loads/dumps for strings. The s suffix means "string."
Type mapping: JSON โ Python
| JSON | Python |
|---|---|
object |
dict |
array |
list |
string |
str |
number (int) |
int |
number (float) |
float |
true / false
|
True / False
|
null |
None |
import json
raw = '{"name": "Alice", "age": 30, "active": true, "score": null}'
data = json.loads(raw)
print(type(data)) # <class 'dict'>
print(type(data["age"])) # <class 'int'>
print(type(data["active"])) # <class 'bool'>
print(data["score"]) # None
Pattern 1: Safe file reading
import json
import os
def load_json_file(path: str, default=None):
"""
Load JSON from file. Returns default if file doesn't exist.
Raises ValueError with context if JSON is malformed.
"""
if not os.path.exists(path):
return default
try:
with open(path, encoding="utf-8") as f:
return json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in {path}: {e.msg} (line {e.lineno})")
# Usage
config = load_json_file("config.json", default={})
state = load_json_file("state.json", default={})
The default parameter lets you skip os.path.exists checks at every call site.
Pattern 2: Atomic writes (never corrupt your file)
import json
import os
import tempfile
def save_json_atomic(path: str, data: dict) -> None:
"""
Write JSON atomically: write to temp file, then rename.
If the process crashes during write, the original file is untouched.
"""
dir_name = os.path.dirname(os.path.abspath(path))
with tempfile.NamedTemporaryFile(
mode="w",
dir=dir_name,
suffix=".tmp",
delete=False,
encoding="utf-8",
) as tmp:
json.dump(data, tmp, indent=2, ensure_ascii=False)
tmp_path = tmp.name
os.replace(tmp_path, path) # atomic on POSIX systems
# Never leaves you with a half-written file
save_json_atomic("state.json", {"task-01": "done", "task-02": "running"})
This pattern is critical for state files in long-running automation. A crash between open() and close() with regular writes can corrupt the file. With atomic writes, you either have the old version or the new version โ never a partial write.
Pattern 3: Pretty-print for debugging
import json
data = {"tasks": [{"id": "t1", "status": "done"}, {"id": "t2", "status": "pending"}]}
# Compact (default โ hard to read)
print(json.dumps(data))
# {"tasks": [{"id": "t1", "status": "done"}, {"id": "t2", "status": "pending"}]}
# Pretty-printed
print(json.dumps(data, indent=2))
# {
# "tasks": [
# {
# "id": "t1",
# "status": "done"
# },
# ...
# ]
# }
# Sorted keys (useful for diffs and version control)
print(json.dumps(data, indent=2, sort_keys=True))
Pattern 4: Handle non-serializable types
import json
from datetime import datetime, date
from decimal import Decimal
class SmartEncoder(json.JSONEncoder):
"""Encode types the default encoder can't handle."""
def default(self, obj):
if isinstance(obj, (datetime, date)):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, set):
return sorted(list(obj))
return super().default(obj)
data = {
"created_at": datetime(2026, 5, 3, 10, 30),
"price": Decimal("9.99"),
"tags": {"python", "tutorial"},
}
print(json.dumps(data, cls=SmartEncoder, indent=2))
# {
# "created_at": "2026-05-03T10:30:00",
# "price": 9.99,
# "tags": ["python", "tutorial"]
# }
Pattern 5: Validate structure before using it
When you load JSON from an API or a file you don't control, validate it before accessing nested keys:
import json
def validate_task(task: dict) -> tuple[bool, str]:
"""Check that a task dict has required fields with correct types."""
required = {"id": str, "name": str, "code": str}
for field, expected_type in required.items():
if field not in task:
return False, f"Missing required field: '{field}'"
if not isinstance(task[field], expected_type):
return False, f"Field '{field}' must be {expected_type.__name__}, got {type(task[field]).__name__}"
return True, ""
# Test it
good = {"id": "task-01", "name": "Print hello", "code": "print('hi')"}
bad = {"id": "task-01", "name": 42} # wrong type for name, missing code
print(validate_task(good)) # (True, '')
print(validate_task(bad)) # (False, "Field 'name' must be str, got int")
Pattern 6: Merge and update JSON files
import json
def update_json_file(path: str, updates: dict) -> dict:
"""Load JSON, apply updates, save back. Returns updated data."""
try:
with open(path) as f:
data = json.load(f)
except FileNotFoundError:
data = {}
data.update(updates)
with open(path, "w") as f:
json.dump(data, f, indent=2)
return data
# Mark a task as done in a state file
update_json_file("task_state.json", {"task-01": "done"})
update_json_file("task_state.json", {"task-02": "done"})
# Result: {"task-01": "done", "task-02": "done"}
Real-world example: task queue
This is the JSON pattern powering the automation pipeline covered in detail in the tutorials below:
import json
import os
from datetime import date
QUEUE_FILE = "publish_queue.json"
def load_queue() -> dict:
return load_json_file(QUEUE_FILE, default={"pending": [], "published": []})
def save_queue(queue: dict) -> None:
save_json_atomic(QUEUE_FILE, queue)
def pop_next_task(queue: dict) -> dict | None:
"""Remove and return the first pending task."""
if not queue["pending"]:
return None
return queue["pending"].pop(0)
def mark_published(queue: dict, task: dict, url: str, article_id: int) -> None:
"""Move a task from pending to published."""
queue["published"].append({
**task,
"date": str(date.today()),
"url": url,
"id": article_id,
})
# Usage
queue = load_queue()
task = pop_next_task(queue)
if task:
# ... publish it ...
mark_published(queue, task, "https://dev.to/...", 12345)
save_queue(queue)
Common errors and fixes
json.JSONDecodeError: Expecting value
# Wrong: passing a filename instead of file contents
data = json.loads("data.json") # tries to parse the string "data.json" as JSON
# Right: open the file first
with open("data.json") as f:
data = json.load(f)
TypeError: Object of type X is not JSON serializable
# Wrong: datetime isn't JSON serializable by default
json.dumps({"date": datetime.now()}) # TypeError
# Right: convert before serializing, or use a custom encoder
json.dumps({"date": datetime.now().isoformat()})
KeyError when accessing loaded data
# Wrong: assuming keys exist
name = data["user"]["name"] # crashes if "user" or "name" missing
# Right: use .get() with defaults
name = data.get("user", {}).get("name", "Unknown")
json module cheat sheet
import json
# Load
with open("f.json") as f: data = json.load(f) # file โ dict
data = json.loads('{"k":"v"}') # string โ dict
# Save
with open("f.json","w") as f: json.dump(data, f, indent=2) # dict โ file
text = json.dumps(data, indent=2, sort_keys=True) # dict โ string
# Debug
print(json.dumps(data, indent=2)) # pretty-print any dict
# Options
json.dumps(data, ensure_ascii=False) # preserve unicode (รฉ, รฑ, etc.)
json.dumps(data, separators=(',',':')) # compact (no spaces)
The state machine in the full pipeline uses load_json_file() + save_json_atomic() for every task: germy5.gumroad.com/l/xhxkzz โ pay what you want, min $9.99.
Top comments (0)