Python Incremental Backup: Only Copy Changed Files Automatically
Full backups copy everything every time. Incremental backups only copy what changed — 10x faster.
The Script
import os, shutil, hashlib, json
from pathlib import Path
def file_hash(path):
md5 = hashlib.md5()
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
md5.update(chunk)
return md5.hexdigest()
class IncrementalBackup:
def __init__(self, src, dst):
self.src = Path(src)
self.dst = Path(dst)
self.manifest = {}
mf = self.dst / '.manifest.json'
if mf.exists():
self.manifest = json.loads(mf.read_text())
def run(self):
copied, skipped = 0, 0
for filepath in self.src.rglob('*'):
if not filepath.is_file(): continue
rel = str(filepath.relative_to(self.src))
h = file_hash(filepath)
if rel not in self.manifest or self.manifest[rel] != h:
dest = self.dst / rel
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(filepath, dest)
self.manifest[rel] = h
copied += 1
else:
skipped += 1
mf = self.dst / '.manifest.json'
self.dst.mkdir(parents=True, exist_ok=True)
mf.write_text(json.dumps(self.manifest))
print(f'Backup: {copied} copied, {skipped} skipped')
# Usage
backup = IncrementalBackup('/home/user/documents', '/backup/documents')
backup.run()
Schedule with Cron
0 * * * * python3 /path/to/backup.py
Want More Scripts?
Get the Python Business Automation Toolkit — 50 scripts, $9 one-time.
What backup strategy do you use?
Top comments (0)