If you've ever manually renamed 500 files or sorted a messy Downloads folder, this is for you.
Bulk Rename with Regex
import os, re
def bulk_rename(directory, pattern, replacement):
renamed = 0
for filename in os.listdir(directory):
new_name = re.sub(pattern, replacement, filename)
if new_name != filename:
os.rename(
os.path.join(directory, filename),
os.path.join(directory, new_name)
)
print(f" {filename} -> {new_name}")
renamed += 1
print(f"Renamed {renamed} files")
# Examples:
# Remove spaces: bulk_rename("./photos", r"\s+", "_")
# Add prefix: bulk_rename("./docs", r"^", "2024_")
# Fix extensions: bulk_rename("./data", r"\.jpeg$", ".jpg")
Organize Files by Extension
import shutil
from pathlib import Path
def organize_by_type(directory):
type_map = {
"Images": [".jpg", ".jpeg", ".png", ".gif", ".svg"],
"Documents": [".pdf", ".doc", ".docx", ".txt", ".md"],
"Data": [".csv", ".json", ".xlsx", ".xml"],
"Code": [".py", ".js", ".html", ".css"],
}
for file in Path(directory).iterdir():
if file.is_file():
ext = file.suffix.lower()
folder = "Other"
for name, extensions in type_map.items():
if ext in extensions:
folder = name
break
dest = Path(directory) / folder
dest.mkdir(exist_ok=True)
shutil.move(str(file), str(dest / file.name))
print(f" {file.name} -> {folder}/")
Find and Remove Duplicates
import hashlib
from collections import defaultdict
def find_duplicates(directory):
hashes = defaultdict(list)
for root, _, files in os.walk(directory):
for f in files:
path = os.path.join(root, f)
try:
h = hashlib.md5(open(path, "rb").read()).hexdigest()
hashes[h].append(path)
except (PermissionError, OSError):
continue
dupes = {h: paths for h, paths in hashes.items() if len(paths) > 1}
total_saved = 0
for h, paths in dupes.items():
size = os.path.getsize(paths[0])
print(f" Duplicate ({size/1024:.0f}KB): {len(paths)} copies")
for p in paths:
print(f" {p}")
total_saved += size * (len(paths) - 1)
print(f"\nPotential space saved: {total_saved/1024/1024:.1f} MB")
return dupes
Watch a Folder for New Files
import time
def watch_folder(directory, callback, interval=2):
seen = set(os.listdir(directory))
print(f"Watching {directory}...")
while True:
current = set(os.listdir(directory))
new_files = current - seen
for f in new_files:
path = os.path.join(directory, f)
print(f" New file: {f}")
callback(path)
seen = current
time.sleep(interval)
# Usage: watch_folder("./inbox", lambda f: print(f"Processing {f}"))
These are 4 of the 10 scripts in my Python Automation Toolkit. The full package includes web scraping, email automation, API caching, PDF processing, and a task scheduler.
Each script is under 100 lines, standalone, and well-documented. $12 for the complete set.
Top comments (0)