Python pathlib: Replace os.path with Cleaner File Handling
os.path works but it's verbose. pathlib.Path does the same job with cleaner code and fewer imports.
Here's every common file operation โ before and after.
๐ Free: AI Publishing Checklist โ 7 steps in Python ยท Full pipeline: germy5.gumroad.com/l/xhxkzz (pay what you want, min $9.99)
The core idea
from pathlib import Path
# os.path approach (old)
import os
base = os.path.dirname(os.path.abspath(__file__))
config = os.path.join(base, "config", "settings.json")
os.makedirs(os.path.dirname(config), exist_ok=True)
# pathlib approach (modern)
from pathlib import Path
base = Path(__file__).parent
config = base / "config" / "settings.json"
config.parent.mkdir(parents=True, exist_ok=True)
The / operator joins paths. Path objects carry all the methods you need.
Anatomy of a Path object
from pathlib import Path
p = Path("/Users/yamil/projects/pipeline/tasks.json")
print(p.name) # 'tasks.json'
print(p.stem) # 'tasks'
print(p.suffix) # '.json'
print(p.parent) # /Users/yamil/projects/pipeline
print(p.parts) # ('/', 'Users', 'yamil', 'projects', 'pipeline', 'tasks.json')
print(p.is_absolute()) # True
print(p.exists()) # depends on your filesystem
Reading and writing files
from pathlib import Path
p = Path("data/state.json")
# Read text
text = p.read_text(encoding="utf-8")
# Write text
p.write_text('{"status": "done"}', encoding="utf-8")
# Read bytes
raw = p.read_bytes()
# Write bytes
p.write_bytes(b"\x89PNG...")
# Open (same as built-in open(), but no need to build the path string)
with p.open("r", encoding="utf-8") as f:
data = json.load(f)
vs os.path:
# Old
path = os.path.join("data", "state.json")
with open(path, "r", encoding="utf-8") as f:
text = f.read()
# New
p = Path("data") / "state.json"
text = p.read_text(encoding="utf-8")
Creating directories
from pathlib import Path
# Create one directory
Path("output").mkdir(exist_ok=True)
# Create nested directories
Path("output/2026/05").mkdir(parents=True, exist_ok=True)
# os.path equivalent (3 lines vs 1)
import os
os.makedirs("output/2026/05", exist_ok=True)
parents=True creates intermediate directories. exist_ok=True doesn't raise if the directory already exists.
Checking if paths exist
from pathlib import Path
p = Path("config.json")
p.exists() # True if file or directory exists
p.is_file() # True only for files
p.is_dir() # True only for directories
p.is_symlink() # True if symbolic link
# Practical example
def load_config(path: Path) -> dict:
if not path.exists():
return {}
return json.loads(path.read_text())
Listing directory contents
from pathlib import Path
d = Path("covers")
# All files in directory
for p in d.iterdir():
print(p.name)
# Filter by extension
png_files = [p for p in d.iterdir() if p.suffix == ".png"]
# Recursive glob โ all Python files in project
py_files = list(Path(".").rglob("*.py"))
# Non-recursive glob โ JSON files in current directory only
json_files = list(Path(".").glob("*.json"))
# os.listdir equivalent
files = list(Path(".").iterdir())
Building paths relative to your script
This is the most common pattern in automation scripts:
from pathlib import Path
# Path of the current script
HERE = Path(__file__).parent
# Paths relative to the script
TASKS_FILE = HERE / "tasks.json"
STATE_FILE = HERE / "state.json"
COVERS_DIR = HERE / "covers"
LOG_FILE = HERE / "logs" / "pipeline.log"
# Create directories that should exist
COVERS_DIR.mkdir(exist_ok=True)
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
Compare with the old approach:
import os
HERE = os.path.dirname(os.path.abspath(__file__))
TASKS_FILE = os.path.join(HERE, "tasks.json")
STATE_FILE = os.path.join(HERE, "state.json")
COVERS_DIR = os.path.join(HERE, "covers")
os.makedirs(COVERS_DIR, exist_ok=True)
Renaming and moving files
from pathlib import Path
p = Path("draft.md")
# Rename in same directory
p.rename("final.md")
# Move to different directory (also works as rename)
p.rename(Path("archive") / "draft.md")
# Copy (pathlib doesn't have copy โ use shutil)
import shutil
shutil.copy2(p, Path("backup") / p.name)
# Delete a file
p.unlink(missing_ok=True) # missing_ok: don't raise if not found
# Delete empty directory
Path("temp_dir").rmdir()
# Delete directory and contents
import shutil
shutil.rmtree(Path("temp_dir"))
Changing extensions
from pathlib import Path
p = Path("chapter_01.md")
# Change extension
epub = p.with_suffix(".epub") # chapter_01.epub
pdf = p.with_suffix(".pdf") # chapter_01.pdf
no_ext = p.with_suffix("") # chapter_01
# Change name completely
backup = p.with_name("chapter_01_backup.md")
# Add a suffix to the stem
versioned = p.with_name(f"{p.stem}_v2{p.suffix}") # chapter_01_v2.md
Real-world pattern: automation pipeline paths
This is the path setup for the ebook publishing pipeline:
from pathlib import Path
import json
class PipelinePaths:
"""Centralized path management for the publishing pipeline."""
def __init__(self, base_dir: Path | None = None):
self.base = base_dir or Path(__file__).parent
# Input
self.outline = self.base / "outline.json"
self.prompt = self.base / "PIPELINE_PROMPT_v4.md"
# Output directories
self.chapters = self.base / "chapters"
self.covers = self.base / "covers"
self.epub_dir = self.base / "epub_output"
# State
self.state_file = self.base / "pipeline_state.json"
self.queue_file = self.base / "publish_queue.json"
self.log_file = self.base / "logs" / "pipeline.log"
self._ensure_dirs()
def _ensure_dirs(self):
"""Create all required directories if they don't exist."""
for d in [self.chapters, self.covers, self.epub_dir, self.log_file.parent]:
d.mkdir(parents=True, exist_ok=True)
def chapter_file(self, chapter_id: str, lang: str = "en") -> Path:
return self.chapters / f"{chapter_id}_{lang}.md"
def cover_file(self, article_filename: str) -> Path:
stem = Path(article_filename).stem
return self.covers / f"{stem}.png"
def load_state(self) -> dict:
if not self.state_file.exists():
return {}
return json.loads(self.state_file.read_text())
def save_state(self, state: dict) -> None:
self.state_file.write_text(json.dumps(state, indent=2))
# Usage
paths = PipelinePaths()
state = paths.load_state()
chapter = paths.chapter_file("ch01")
print(f"Chapter path: {chapter}")
print(f"Exists: {chapter.exists()}")
Quick comparison: os.path vs pathlib
| Operation | os.path | pathlib |
|---|---|---|
| Join paths | os.path.join(a, b, c) |
Path(a) / b / c |
| Get filename | os.path.basename(p) |
Path(p).name |
| Get directory | os.path.dirname(p) |
Path(p).parent |
| Get extension | os.path.splitext(p)[1] |
Path(p).suffix |
| Check exists | os.path.exists(p) |
Path(p).exists() |
| Is file? | os.path.isfile(p) |
Path(p).is_file() |
| Is dir? | os.path.isdir(p) |
Path(p).is_dir() |
| Make dirs | os.makedirs(p, exist_ok=True) |
Path(p).mkdir(parents=True, exist_ok=True) |
| Absolute path | os.path.abspath(p) |
Path(p).resolve() |
| Script directory | os.path.dirname(os.path.abspath(__file__)) |
Path(__file__).parent |
| List directory | os.listdir(p) |
list(Path(p).iterdir()) |
| Read file | open(p).read() |
Path(p).read_text() |
When to still use os.path
pathlib is in the standard library since Python 3.4 and preferred for most use cases. But some situations still call for os.path:
import os
# os.path.expandvars/expanduser โ pathlib has no equivalent
config = os.path.expandvars("$HOME/.config/pipeline.json")
home = os.path.expanduser("~/projects")
# os.path.commonpath โ finding shared prefix
common = os.path.commonpath(["/a/b/c", "/a/b/d"]) # "/a/b"
# When passing to legacy APIs that only accept strings (not Path objects)
# (most modern Python accepts Path objects directly, but not all)
subprocess.run(["python3", str(script_path)]) # explicit str() for safety
The pipeline uses Path(__file__).parent for every file reference โ no hardcoded paths, no os.path.join chains: germy5.gumroad.com/l/xhxkzz โ pay what you want, min $9.99.
Top comments (0)