DEV Community

Art Baker
Art Baker

Posted on

Python File Automation: Rename, Organize, and Process Files in Bulk

If you've ever manually renamed 500 files or sorted a messy Downloads folder, this is for you.

Bulk Rename with Regex

import os, re

def bulk_rename(directory, pattern, replacement):
    renamed = 0
    for filename in os.listdir(directory):
        new_name = re.sub(pattern, replacement, filename)
        if new_name != filename:
            os.rename(
                os.path.join(directory, filename),
                os.path.join(directory, new_name)
            )
            print(f"  {filename} -> {new_name}")
            renamed += 1
    print(f"Renamed {renamed} files")

# Examples:
# Remove spaces: bulk_rename("./photos", r"\s+", "_")
# Add prefix: bulk_rename("./docs", r"^", "2024_")
# Fix extensions: bulk_rename("./data", r"\.jpeg$", ".jpg")
Enter fullscreen mode Exit fullscreen mode

Organize Files by Extension

import shutil
from pathlib import Path

def organize_by_type(directory):
    type_map = {
        "Images": [".jpg", ".jpeg", ".png", ".gif", ".svg"],
        "Documents": [".pdf", ".doc", ".docx", ".txt", ".md"],
        "Data": [".csv", ".json", ".xlsx", ".xml"],
        "Code": [".py", ".js", ".html", ".css"],
    }

    for file in Path(directory).iterdir():
        if file.is_file():
            ext = file.suffix.lower()
            folder = "Other"
            for name, extensions in type_map.items():
                if ext in extensions:
                    folder = name
                    break
            dest = Path(directory) / folder
            dest.mkdir(exist_ok=True)
            shutil.move(str(file), str(dest / file.name))
            print(f"  {file.name} -> {folder}/")
Enter fullscreen mode Exit fullscreen mode

Find and Remove Duplicates

import hashlib
from collections import defaultdict

def find_duplicates(directory):
    hashes = defaultdict(list)
    for root, _, files in os.walk(directory):
        for f in files:
            path = os.path.join(root, f)
            try:
                h = hashlib.md5(open(path, "rb").read()).hexdigest()
                hashes[h].append(path)
            except (PermissionError, OSError):
                continue

    dupes = {h: paths for h, paths in hashes.items() if len(paths) > 1}
    total_saved = 0
    for h, paths in dupes.items():
        size = os.path.getsize(paths[0])
        print(f"  Duplicate ({size/1024:.0f}KB): {len(paths)} copies")
        for p in paths:
            print(f"    {p}")
        total_saved += size * (len(paths) - 1)

    print(f"\nPotential space saved: {total_saved/1024/1024:.1f} MB")
    return dupes
Enter fullscreen mode Exit fullscreen mode

Watch a Folder for New Files

import time

def watch_folder(directory, callback, interval=2):
    seen = set(os.listdir(directory))
    print(f"Watching {directory}...")
    while True:
        current = set(os.listdir(directory))
        new_files = current - seen
        for f in new_files:
            path = os.path.join(directory, f)
            print(f"  New file: {f}")
            callback(path)
        seen = current
        time.sleep(interval)

# Usage: watch_folder("./inbox", lambda f: print(f"Processing {f}"))
Enter fullscreen mode Exit fullscreen mode

These are 4 of the 10 scripts in my Python Automation Toolkit. The full package includes web scraping, email automation, API caching, PDF processing, and a task scheduler.

Each script is under 100 lines, standalone, and well-documented. $12 for the complete set.

Top comments (0)