Every project I've worked on eventually needs the same handful of scripts. File renaming, CSV cleaning, API fetching with retries, PDF merging — the kind of thing that's 30 minutes to write and 5 minutes to forget.
I finally sat down and wrote clean, production-ready versions. Here's the collection — I'll walk through the most useful ones with full code.
The Pattern
Every script follows the same structure:
# ── CONFIG ───────────────────────────────────────────────
SOURCE_DIR = "/path/to/folder"
OUTPUT_DIR = "/path/to/output"
# ─────────────────────────────────────────────────────────
# All the actual logic below — don't touch unless you want to
Edit the config block at the top, run the script. No CLI flags to memorize.
1. File Organizer
Sorts a directory by file extension. Run it on your Downloads folder once and it actually stays clean.
import shutil
from pathlib import Path
# ── CONFIG ───────────────────────────────────────────────
SOURCE_DIR = Path.home() / "Downloads"
OUTPUT_DIR = Path.home() / "Downloads" / "sorted"
CATEGORIES = {
"Images": [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg", ".heic"],
"Videos": [".mp4", ".mov", ".avi", ".mkv", ".webm"],
"Documents": [".pdf", ".doc", ".docx", ".txt", ".md", ".xlsx", ".csv"],
"Archives": [".zip", ".tar", ".gz", ".7z", ".rar"],
"Code": [".py", ".js", ".ts", ".html", ".css", ".json", ".yaml"],
}
# ─────────────────────────────────────────────────────────
ext_map = {ext: cat for cat, exts in CATEGORIES.items() for ext in exts}
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
moved = 0
for f in SOURCE_DIR.iterdir():
if f.is_file() and f.name != ".DS_Store":
category = ext_map.get(f.suffix.lower(), "Other")
dest = OUTPUT_DIR / category
dest.mkdir(exist_ok=True)
shutil.move(str(f), dest / f.name)
moved += 1
print(f"Moved {moved} files.")
2. API Fetcher with Retry + Pagination
This is the one I reach for constantly. requests.get() tutorials always stop before the part where things actually break.
import requests
import time
from typing import Optional
# ── CONFIG ───────────────────────────────────────────────
BASE_URL = "https://api.example.com"
ENDPOINT = "/items"
API_KEY = "your-api-key"
PAGE_SIZE = 100
MAX_RETRIES = 3
RATE_LIMIT_DELAY = 0.5 # seconds between requests
# ─────────────────────────────────────────────────────────
def fetch_page(page: int, session: requests.Session) -> Optional[dict]:
headers = {"Authorization": f"Bearer {API_KEY}"}
params = {"page": page, "limit": PAGE_SIZE}
for attempt in range(MAX_RETRIES):
try:
r = session.get(f"{BASE_URL}{ENDPOINT}", headers=headers, params=params, timeout=10)
r.raise_for_status()
return r.json()
except requests.exceptions.HTTPError as e:
if r.status_code == 429: # rate limited
wait = int(r.headers.get("Retry-After", 10))
print(f"Rate limited. Waiting {wait}s...")
time.sleep(wait)
elif r.status_code >= 500:
wait = 2 ** attempt
print(f"Server error, retry {attempt+1}/{MAX_RETRIES} in {wait}s...")
time.sleep(wait)
else:
raise
except requests.exceptions.RequestException as e:
wait = 2 ** attempt
print(f"Request failed: {e}. Retry {attempt+1}/{MAX_RETRIES} in {wait}s...")
time.sleep(wait)
return None
def fetch_all() -> list:
results = []
page = 1
with requests.Session() as session:
while True:
print(f"Fetching page {page}...")
data = fetch_page(page, session)
if not data or not data.get("items"):
break
results.extend(data["items"])
if len(data["items"]) < PAGE_SIZE:
break # last page
page += 1
time.sleep(RATE_LIMIT_DELAY)
return results
items = fetch_all()
print(f"Fetched {len(items)} items total.")
The key bits most tutorials skip: exponential backoff on server errors, respecting Retry-After headers, and detecting the last page without crashing on an empty response.
3. Bulk File Renamer
Rename 500 files in one command. Supports regex, prefix/suffix, and auto-numbering.
import re
from pathlib import Path
# ── CONFIG ───────────────────────────────────────────────
TARGET_DIR = "/path/to/files"
FILE_PATTERN = "*.jpg" # glob pattern for files to rename
RENAME_MODE = "prefix" # "regex", "prefix", "suffix", "number"
PREFIX = "photo_" # used if RENAME_MODE = "prefix"
SUFFIX = "_final" # used if RENAME_MODE = "suffix"
REGEX_FIND = r"\s+" # used if RENAME_MODE = "regex"
REGEX_REPLACE = "_"
START_NUMBER = 1 # used if RENAME_MODE = "number"
DRY_RUN = True # preview without actually renaming
# ─────────────────────────────────────────────────────────
folder = Path(TARGET_DIR)
files = sorted(folder.glob(FILE_PATTERN))
print(f"Found {len(files)} files.")
for i, f in enumerate(files, start=START_NUMBER):
stem, suffix = f.stem, f.suffix
if RENAME_MODE == "prefix":
new_name = f"{PREFIX}{stem}{suffix}"
elif RENAME_MODE == "suffix":
new_name = f"{stem}{SUFFIX}{suffix}"
elif RENAME_MODE == "regex":
new_name = re.sub(REGEX_FIND, REGEX_REPLACE, stem) + suffix
elif RENAME_MODE == "number":
new_name = f"{PREFIX}{str(i).zfill(4)}{suffix}"
else:
continue
print(f" {'[DRY RUN] ' if DRY_RUN else ''}Rename: {f.name} → {new_name}")
if not DRY_RUN:
f.rename(f.parent / new_name)
if DRY_RUN:
print("\nDry run complete. Set DRY_RUN = False to apply.")
Always run with DRY_RUN = True first. I added that after the first time I renamed 200 files the wrong way.
4. CSV Processor
Filter, dedupe, and transform CSVs without pandas (for when pandas is overkill).
import csv
from pathlib import Path
# ── CONFIG ───────────────────────────────────────────────
INPUT_FILE = "input.csv"
OUTPUT_FILE = "output.csv"
DEDUP_COLUMN = "email" # column to deduplicate on (None to skip)
FILTER_COLUMN = "status" # column to filter on (None to skip)
FILTER_VALUE = "active" # keep rows where FILTER_COLUMN == this
DROP_COLUMNS = ["internal_id", "temp_notes"] # columns to remove
# ─────────────────────────────────────────────────────────
seen = set()
kept = []
with open(INPUT_FILE, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
if FILTER_COLUMN and row.get(FILTER_COLUMN) != FILTER_VALUE:
continue
if DEDUP_COLUMN:
key = row.get(DEDUP_COLUMN, "").strip().lower()
if key in seen:
continue
seen.add(key)
kept.append({k: v for k, v in row.items() if k not in DROP_COLUMNS})
if kept:
with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=kept[0].keys())
writer.writeheader()
writer.writerows(kept)
print(f"Done. {len(kept)} rows written to {OUTPUT_FILE}.")
The Rest
The other 6 in the collection:
-
web_scraper.py— BeautifulSoup template with pagination and delay -
email_sender.py— SMTP email (Gmail, Outlook, any provider) -
pdf_merger.py— Merge or split PDFs -
dir_monitor.py— Watch a folder, trigger actions on changes -
excel_to_json.py— Excel/CSV → clean JSON -
image_resizer.py— Batch resize and compress images
If you want to use them as a bundle (all 10 scripts + README), I packaged them here: Python Automation Scripts Pack ($19). Or drop a comment with which one you want and I'll paste the code.
What scripts do you find yourself rewriting constantly? Always looking for what to add next.
Top comments (0)