Art Baker

Posted on Apr 16

10 Python Automation Scripts That Actually Save Time (with code)

#python #automation #productivity #devtools

Every developer accumulates automation scripts over time. I finally collected my 10 most-reached-for ones, cleaned them up, and want to walk through what they do and how they work.

All are standalone (minimal dependencies), under 100 lines each, and have been used in real projects.

1. Bulk File Renamer

Rename hundreds of files in seconds using regex patterns. Great for photo organizing, log file management, and build artifact cleanup.

import re, pathlib

def bulk_rename(directory, pattern, replacement, dry_run=True):
    path = pathlib.Path(directory)
    renamed = []
    for f in path.iterdir():
        if f.is_file():
            new_name = re.sub(pattern, replacement, f.name)
            if new_name != f.name:
                if not dry_run:
                    f.rename(f.parent / new_name)
                renamed.append((f.name, new_name))
    return renamed

Run with dry_run=True first to preview changes.

2. CSV/Excel Merger

Combines multiple spreadsheets with smart column matching — handles mismatched column orders automatically.

import pandas as pd, glob

def merge_spreadsheets(directory, output_file):
    files = glob.glob(f"{directory}/*.csv") + glob.glob(f"{directory}/*.xlsx")
    dfs = []
    for f in files:
        df = pd.read_csv(f) if f.endswith('.csv') else pd.read_excel(f)
        dfs.append(df)
    merged = pd.concat(dfs, ignore_index=True, sort=False)
    merged.to_csv(output_file, index=False)
    return len(merged)

3. Email Sender with Attachments

Programmatic email via Gmail SMTP. Handles HTML content and multiple attachments.

import smtplib, os
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders

def send_email(sender, password, recipient, subject, body, attachments=[]):
    msg = MIMEMultipart()
    msg['From'] = sender
    msg['To'] = recipient
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'html'))

    for filepath in attachments:
        with open(filepath, 'rb') as f:
            part = MIMEBase('application', 'octet-stream')
            part.set_payload(f.read())
        encoders.encode_base64(part)
        part.add_header('Content-Disposition', f'attachment; filename={os.path.basename(filepath)}')
        msg.attach(part)

    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender, password)
        server.send_message(msg)

Use an App Password if you have 2FA enabled on Gmail.

4. Web Scraper Template

BeautifulSoup + requests starter with rate limiting and retry logic built in.

import requests, time, random
from bs4 import BeautifulSoup

def scrape_page(url, delay_range=(1, 3)):
    time.sleep(random.uniform(*delay_range))  # Be polite
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}

    for attempt in range(3):
        try:
            resp = requests.get(url, headers=headers, timeout=10)
            resp.raise_for_status()
            return BeautifulSoup(resp.text, 'html.parser')
        except requests.RequestException:
            if attempt == 2:
                raise
            time.sleep(2 ** attempt)

5. PDF Text Extractor

Extract text from PDFs for downstream processing. Handles multi-page documents.

import PyPDF2

def extract_pdf_text(filepath, pages=None):
    with open(filepath, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        if pages is None:
            pages = range(len(reader.pages))

        text = []
        for page_num in pages:
            text.append(reader.pages[page_num].extract_text())

    return '\n\n'.join(text)

6. Directory Watcher

Trigger custom actions when files are added, modified, or deleted. Uses watchdog.

from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time

class FileHandler(FileSystemEventHandler):
    def on_created(self, event):
        if not event.is_directory:
            print(f"New file: {event.src_path}")
            # processing logic here

def watch_directory(path, handler=None):
    observer = Observer()
    observer.schedule(handler or FileHandler(), path, recursive=False)
    observer.start()
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()
    observer.join()

7. JSON/CSV Converter

Bidirectional conversion with nested JSON flattening — handles deeply nested structures.

import json, csv

def flatten(d, parent_key='', sep='.'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten(v, new_key, sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def json_to_csv(json_file, csv_file):
    with open(json_file) as f:
        data = json.load(f)
    if isinstance(data, dict):
        data = [data]
    flat = [flatten(item) for item in data]
    keys = sorted({k for d in flat for k in d})
    with open(csv_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        writer.writerows(flat)

8. Duplicate File Finder

Finds (and optionally removes) duplicate files using MD5 hashing.

import hashlib, pathlib
from collections import defaultdict

def find_duplicates(directory, remove=False):
    hashes = defaultdict(list)

    for filepath in pathlib.Path(directory).rglob('*'):
        if filepath.is_file():
            md5 = hashlib.md5(filepath.read_bytes()).hexdigest()
            hashes[md5].append(filepath)

    duplicates = {h: paths for h, paths in hashes.items() if len(paths) > 1}

    if remove:
        for paths in duplicates.values():
            for path in paths[1:]:
                path.unlink()

    return duplicates

9. Scheduled Task Runner

Lightweight cron-style scheduler in pure Python.

import schedule, time, threading

def run_scheduler(jobs):
    for j in jobs:
        schedule.every(j['interval']).hours.do(j['fn'])

    while True:
        schedule.run_pending()
        time.sleep(60)

thread = threading.Thread(
    target=run_scheduler,
    args=([{'interval': 1, 'fn': lambda: print('hourly')}],),
    daemon=True
)
thread.start()

10. API Response Cacher

Decorator that caches API responses to disk — avoids repeated calls and rate limit issues.

import json, hashlib, os, time
from functools import wraps

def cache_api(cache_dir='.cache', ttl=3600):
    os.makedirs(cache_dir, exist_ok=True)

    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            key = hashlib.md5(str(args + tuple(kwargs.items())).encode()).hexdigest()
            cache_file = os.path.join(cache_dir, f'{key}.json')

            if os.path.exists(cache_file) and time.time() - os.path.getmtime(cache_file) < ttl:
                with open(cache_file) as f:
                    return json.load(f)

            result = func(*args, **kwargs)
            with open(cache_file, 'w') as f:
                json.dump(result, f)
            return result
        return wrapper
    return decorator

Wrapping up

These are the 10 I reach for most. Each is small and modifiable for your own projects.

I've packaged all 10 with docs and example usage here if you want the ready-to-use versions: payhip.com/b/Zm4lG ($12).

What automation scripts have saved you the most time?

DEV Community