DEV Community

Otto Brennan
Otto Brennan

Posted on

10 Python Scripts I Keep Rewriting (So I Finally Made Them Good)

Every project I've worked on eventually needs the same handful of scripts. File renaming, CSV cleaning, API fetching with retries, PDF merging — the kind of thing that's 30 minutes to write and 5 minutes to forget.

I finally sat down and wrote clean, production-ready versions. Here's the collection — I'll walk through the most useful ones with full code.

The Pattern

Every script follows the same structure:

# ── CONFIG ───────────────────────────────────────────────
SOURCE_DIR = "/path/to/folder"
OUTPUT_DIR = "/path/to/output"
# ─────────────────────────────────────────────────────────

# All the actual logic below — don't touch unless you want to
Enter fullscreen mode Exit fullscreen mode

Edit the config block at the top, run the script. No CLI flags to memorize.


1. File Organizer

Sorts a directory by file extension. Run it on your Downloads folder once and it actually stays clean.

import shutil
from pathlib import Path

# ── CONFIG ───────────────────────────────────────────────
SOURCE_DIR = Path.home() / "Downloads"
OUTPUT_DIR = Path.home() / "Downloads" / "sorted"
CATEGORIES = {
    "Images":    [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg", ".heic"],
    "Videos":    [".mp4", ".mov", ".avi", ".mkv", ".webm"],
    "Documents": [".pdf", ".doc", ".docx", ".txt", ".md", ".xlsx", ".csv"],
    "Archives":  [".zip", ".tar", ".gz", ".7z", ".rar"],
    "Code":      [".py", ".js", ".ts", ".html", ".css", ".json", ".yaml"],
}
# ─────────────────────────────────────────────────────────

ext_map = {ext: cat for cat, exts in CATEGORIES.items() for ext in exts}
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

moved = 0
for f in SOURCE_DIR.iterdir():
    if f.is_file() and f.name != ".DS_Store":
        category = ext_map.get(f.suffix.lower(), "Other")
        dest = OUTPUT_DIR / category
        dest.mkdir(exist_ok=True)
        shutil.move(str(f), dest / f.name)
        moved += 1

print(f"Moved {moved} files.")
Enter fullscreen mode Exit fullscreen mode

2. API Fetcher with Retry + Pagination

This is the one I reach for constantly. requests.get() tutorials always stop before the part where things actually break.

import requests
import time
from typing import Optional

# ── CONFIG ───────────────────────────────────────────────
BASE_URL = "https://api.example.com"
ENDPOINT = "/items"
API_KEY = "your-api-key"
PAGE_SIZE = 100
MAX_RETRIES = 3
RATE_LIMIT_DELAY = 0.5  # seconds between requests
# ─────────────────────────────────────────────────────────

def fetch_page(page: int, session: requests.Session) -> Optional[dict]:
    headers = {"Authorization": f"Bearer {API_KEY}"}
    params = {"page": page, "limit": PAGE_SIZE}

    for attempt in range(MAX_RETRIES):
        try:
            r = session.get(f"{BASE_URL}{ENDPOINT}", headers=headers, params=params, timeout=10)
            r.raise_for_status()
            return r.json()
        except requests.exceptions.HTTPError as e:
            if r.status_code == 429:  # rate limited
                wait = int(r.headers.get("Retry-After", 10))
                print(f"Rate limited. Waiting {wait}s...")
                time.sleep(wait)
            elif r.status_code >= 500:
                wait = 2 ** attempt
                print(f"Server error, retry {attempt+1}/{MAX_RETRIES} in {wait}s...")
                time.sleep(wait)
            else:
                raise
        except requests.exceptions.RequestException as e:
            wait = 2 ** attempt
            print(f"Request failed: {e}. Retry {attempt+1}/{MAX_RETRIES} in {wait}s...")
            time.sleep(wait)
    return None

def fetch_all() -> list:
    results = []
    page = 1
    with requests.Session() as session:
        while True:
            print(f"Fetching page {page}...")
            data = fetch_page(page, session)
            if not data or not data.get("items"):
                break
            results.extend(data["items"])
            if len(data["items"]) < PAGE_SIZE:
                break  # last page
            page += 1
            time.sleep(RATE_LIMIT_DELAY)
    return results

items = fetch_all()
print(f"Fetched {len(items)} items total.")
Enter fullscreen mode Exit fullscreen mode

The key bits most tutorials skip: exponential backoff on server errors, respecting Retry-After headers, and detecting the last page without crashing on an empty response.


3. Bulk File Renamer

Rename 500 files in one command. Supports regex, prefix/suffix, and auto-numbering.

import re
from pathlib import Path

# ── CONFIG ───────────────────────────────────────────────
TARGET_DIR = "/path/to/files"
FILE_PATTERN = "*.jpg"        # glob pattern for files to rename
RENAME_MODE = "prefix"        # "regex", "prefix", "suffix", "number"
PREFIX = "photo_"             # used if RENAME_MODE = "prefix"
SUFFIX = "_final"             # used if RENAME_MODE = "suffix"
REGEX_FIND = r"\s+"           # used if RENAME_MODE = "regex"
REGEX_REPLACE = "_"
START_NUMBER = 1              # used if RENAME_MODE = "number"
DRY_RUN = True                # preview without actually renaming
# ─────────────────────────────────────────────────────────

folder = Path(TARGET_DIR)
files = sorted(folder.glob(FILE_PATTERN))
print(f"Found {len(files)} files.")

for i, f in enumerate(files, start=START_NUMBER):
    stem, suffix = f.stem, f.suffix
    if RENAME_MODE == "prefix":
        new_name = f"{PREFIX}{stem}{suffix}"
    elif RENAME_MODE == "suffix":
        new_name = f"{stem}{SUFFIX}{suffix}"
    elif RENAME_MODE == "regex":
        new_name = re.sub(REGEX_FIND, REGEX_REPLACE, stem) + suffix
    elif RENAME_MODE == "number":
        new_name = f"{PREFIX}{str(i).zfill(4)}{suffix}"
    else:
        continue

    print(f"  {'[DRY RUN] ' if DRY_RUN else ''}Rename: {f.name}{new_name}")
    if not DRY_RUN:
        f.rename(f.parent / new_name)

if DRY_RUN:
    print("\nDry run complete. Set DRY_RUN = False to apply.")
Enter fullscreen mode Exit fullscreen mode

Always run with DRY_RUN = True first. I added that after the first time I renamed 200 files the wrong way.


4. CSV Processor

Filter, dedupe, and transform CSVs without pandas (for when pandas is overkill).

import csv
from pathlib import Path

# ── CONFIG ───────────────────────────────────────────────
INPUT_FILE = "input.csv"
OUTPUT_FILE = "output.csv"
DEDUP_COLUMN = "email"        # column to deduplicate on (None to skip)
FILTER_COLUMN = "status"      # column to filter on (None to skip)
FILTER_VALUE = "active"       # keep rows where FILTER_COLUMN == this
DROP_COLUMNS = ["internal_id", "temp_notes"]  # columns to remove
# ─────────────────────────────────────────────────────────

seen = set()
kept = []

with open(INPUT_FILE, newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        if FILTER_COLUMN and row.get(FILTER_COLUMN) != FILTER_VALUE:
            continue
        if DEDUP_COLUMN:
            key = row.get(DEDUP_COLUMN, "").strip().lower()
            if key in seen:
                continue
            seen.add(key)
        kept.append({k: v for k, v in row.items() if k not in DROP_COLUMNS})

if kept:
    with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=kept[0].keys())
        writer.writeheader()
        writer.writerows(kept)

print(f"Done. {len(kept)} rows written to {OUTPUT_FILE}.")
Enter fullscreen mode Exit fullscreen mode

The Rest

The other 6 in the collection:

  • web_scraper.py — BeautifulSoup template with pagination and delay
  • email_sender.py — SMTP email (Gmail, Outlook, any provider)
  • pdf_merger.py — Merge or split PDFs
  • dir_monitor.py — Watch a folder, trigger actions on changes
  • excel_to_json.py — Excel/CSV → clean JSON
  • image_resizer.py — Batch resize and compress images

If you want to use them as a bundle (all 10 scripts + README), I packaged them here: Python Automation Scripts Pack ($19). Or drop a comment with which one you want and I'll paste the code.


What scripts do you find yourself rewriting constantly? Always looking for what to add next.

Top comments (0)