When you want to manipulate files and directories in Python you typically grab os. However, if your goal isn't manipulating file paths, then it's time to use shutil (shell utilities). Copying, moving, archiving and disk usage are all done through a very high level API.
Let's go through everything it can do.
Why shutil Over os?
os provides you primitives : os.rename(), os.remove() os.mkdir(). These work, but they don't work with trees, they don't copy file meta data, they don't compress anything.
shutil works at a higher level - directories, archives, full file trees - and it comes in the standard library. No need to install.
import shutil
Copying Files
shutil.copy() — Content + Permissions
import shutil
# Copies file content and permissions (not metadata like timestamps)
shutil.copy("source.txt", "destination.txt")
# Can also copy into a directory
shutil.copy("report.pdf", "/home/user/documents/")
shutil.copy2() — Content + Full Metadata
# Copies content + permissions + timestamps + other metadata
# Use this when you want a true duplicate
shutil.copy2("source.txt", "backup.txt")
shutil.copyfile() — Content Only
# Copies only file content — no permissions, no metadata
shutil.copyfile("source.txt", "destination.txt")
shutil.copyfileobj() — File Object to File Object
# Useful when you already have open file handles
with open("source.txt", "rb") as src, open("dest.txt", "wb") as dst:
shutil.copyfileobj(src, dst)
# Also works with HTTP responses or any file-like object
import urllib.request
with urllib.request.urlopen("https://example.com/file.csv") as response:
with open("file.csv", "wb") as out:
shutil.copyfileobj(response, out)
Copying Entire Directory Trees
# Copy an entire directory tree
shutil.copytree("project/", "project_backup/")
# The destination must NOT exist (it will be created)
With Filters
import shutil
import os
def ignore_cache(dir, contents):
"""Ignore __pycache__ and .pyc files"""
return [
item for item in contents
if item == "__pycache__" or item.endswith(".pyc")
]
shutil.copytree("myproject/", "myproject_clean/", ignore=ignore_cache)
shutil.ignore_patterns() is a shortcut for common patterns:
shutil.copytree(
"myproject/",
"myproject_clean/",
ignore=shutil.ignore_patterns("*.pyc", "__pycache__", "*.log", ".git")
)
Copy Into Existing Directory (Python 3.8+)
# dirs_exist_ok=True allows copying into an existing destination
shutil.copytree("source/", "destination/", dirs_exist_ok=True)
Moving Files and Directories
# Move a file
shutil.move("report.pdf", "/archive/2024/report.pdf")
# Move a directory (rename if on same filesystem, copy+delete otherwise)
shutil.move("old_project/", "archive/old_project/")
shutil.move() is smarter than os.rename() — it works across filesystems and handles directories.
Deleting Directory Trees
# Remove an entire directory tree (like `rm -rf`)
shutil.rmtree("old_project/")
# With error handling
def handle_error(func, path, exc_info):
print(f"Error deleting {path}: {exc_info[1]}")
shutil.rmtree("old_project/", onerror=handle_error)
shutil.rmtree() is irreversible. There is no trash — it's gone. Always double-check the path.
Archiving: zip, tar, gzip, bz2
Creating Archives
# Create a zip archive
# shutil.make_archive(base_name, format, root_dir, base_dir)
shutil.make_archive(
"project_backup", # output filename (without extension)
"zip", # format: zip, tar, gztar, bztar, xztar
".", # root directory
"myproject" # directory to archive
)
# Creates: project_backup.zip
# Create a gzipped tar
shutil.make_archive("project_backup", "gztar", ".", "myproject")
# Creates: project_backup.tar.gz
Extracting Archives
# Extract any supported archive format
shutil.unpack_archive("project_backup.zip", "extracted/")
# Auto-detects format from extension
shutil.unpack_archive("project_backup.tar.gz", "extracted/")
Supported Formats
# See what formats are available on your system
print(shutil.get_archive_formats())
# [('bztar', "bzip2'ed tar-file"), ('gztar', "gzip'ed tar-file"),
# ('tar', 'uncompressed tar file'), ('xztar', "xz'ed tar-file"), ('zip', 'ZIP file')]
Disk Usage
# Check disk usage on a path
usage = shutil.disk_usage("/")
print(f"Total: {usage.total / 1e9:.1f} GB")
print(f"Used: {usage.used / 1e9:.1f} GB")
print(f"Free: {usage.free / 1e9:.1f} GB")
print(f"Usage: {usage.used / usage.total * 100:.1f}%")
Total: 500.1 GB
Used: 127.4 GB
Free: 372.7 GB
Usage: 25.5%
Finding Executables
# Like `which` in bash
python_path = shutil.which("python3")
print(python_path) # /usr/bin/python3
git_path = shutil.which("git")
print(git_path) # /usr/bin/git
# Returns None if not found
npm_path = shutil.which("npm")
if npm_path is None:
print("npm is not installed")
Terminal Size
# Get the current terminal dimensions
size = shutil.get_terminal_size()
print(f"Columns: {size.columns}, Lines: {size.lines}")
# Columns: 220, Lines: 50
# With fallback for non-terminal environments
size = shutil.get_terminal_size(fallback=(80, 24))
Real-World Patterns
Timestamped Backups
import shutil
from datetime import datetime
def backup_directory(source: str, backup_root: str) -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{backup_root}/backup_{timestamp}"
shutil.copytree(source, backup_name)
print(f"Backup created: {backup_name}")
return backup_name
backup_directory("myproject/", "~/backups")
Safe Directory Reset
import shutil
import os
def reset_directory(path: str):
"""Delete and recreate a directory cleanly."""
if os.path.exists(path):
shutil.rmtree(path)
os.makedirs(path)
reset_directory("build/")
reset_directory("dist/")
Deployment Script
import shutil
import os
def deploy(source: str, destination: str):
"""Copy build artifacts to deployment directory, ignoring dev files."""
if os.path.exists(destination):
shutil.rmtree(destination)
shutil.copytree(
source,
destination,
ignore=shutil.ignore_patterns(
"*.pyc", "__pycache__", "*.test.py",
".env", ".git", "tests/", "*.md"
)
)
print(f"Deployed {source} → {destination}")
deploy("myapp/", "/var/www/myapp/")
Archive and Rotate Old Logs
import shutil
import os
from pathlib import Path
def archive_logs(log_dir: str, archive_dir: str, max_archives: int = 5):
"""Archive log directory and keep only the N most recent archives."""
os.makedirs(archive_dir, exist_ok=True)
# Create new archive
archive_path = shutil.make_archive(
base_name=os.path.join(archive_dir, "logs"),
format="gztar",
root_dir=os.path.dirname(log_dir),
base_dir=os.path.basename(log_dir),
)
print(f"Archived: {archive_path}")
# Rotate — keep only max_archives most recent
archives = sorted(Path(archive_dir).glob("*.tar.gz"))
for old_archive in archives[:-max_archives]:
old_archive.unlink()
print(f"Removed old archive: {old_archive}")
archive_logs("logs/", "log_archives/")
Quick Reference
| Function | What It Does |
|---|---|
shutil.copy(src, dst) |
Copy file with permissions |
shutil.copy2(src, dst) |
Copy file with full metadata |
shutil.copyfile(src, dst) |
Copy file content only |
shutil.copyfileobj(fsrc, fdst) |
Copy between file objects |
shutil.copytree(src, dst) |
Copy entire directory tree |
shutil.move(src, dst) |
Move file or directory |
shutil.rmtree(path) |
Delete directory tree |
shutil.make_archive(name, fmt, ...) |
Create zip/tar archive |
shutil.unpack_archive(file, dst) |
Extract archive |
shutil.disk_usage(path) |
Get disk usage stats |
shutil.which(name) |
Find executable path |
shutil.get_terminal_size() |
Get terminal dimensions |
Summary
shutil is one of those modules that solves dozens of problems that you might otherwise implement with subprocesses or third-party libraries or brittle hand-crafted loops. Copying files with metadata, moving files across filesystems, recursing through directories, handling archives - stdlib, all completely bulletproof.
Top comments (0)