DEV Community

Brad
Brad

Posted on

Python Incremental Backup: Only Copy Changed Files Automatically

Python Incremental Backup: Only Copy Changed Files Automatically

Full backups copy everything every time. Incremental backups only copy what changed — 10x faster.

The Script

import os, shutil, hashlib, json
from pathlib import Path

def file_hash(path):
    md5 = hashlib.md5()
    with open(path, 'rb') as f:
        for chunk in iter(lambda: f.read(4096), b''):
            md5.update(chunk)
    return md5.hexdigest()

class IncrementalBackup:
    def __init__(self, src, dst):
        self.src = Path(src)
        self.dst = Path(dst)
        self.manifest = {}
        mf = self.dst / '.manifest.json'
        if mf.exists():
            self.manifest = json.loads(mf.read_text())

    def run(self):
        copied, skipped = 0, 0
        for filepath in self.src.rglob('*'):
            if not filepath.is_file(): continue
            rel = str(filepath.relative_to(self.src))
            h = file_hash(filepath)
            if rel not in self.manifest or self.manifest[rel] != h:
                dest = self.dst / rel
                dest.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(filepath, dest)
                self.manifest[rel] = h
                copied += 1
            else:
                skipped += 1
        mf = self.dst / '.manifest.json'
        self.dst.mkdir(parents=True, exist_ok=True)
        mf.write_text(json.dumps(self.manifest))
        print(f'Backup: {copied} copied, {skipped} skipped')

# Usage
backup = IncrementalBackup('/home/user/documents', '/backup/documents')
backup.run()
Enter fullscreen mode Exit fullscreen mode

Schedule with Cron

0 * * * * python3 /path/to/backup.py
Enter fullscreen mode Exit fullscreen mode

Want More Scripts?

Get the Python Business Automation Toolkit — 50 scripts, $9 one-time.

What backup strategy do you use?

Top comments (0)