DEV Community

MEROLINE LIZLENT
MEROLINE LIZLENT

Posted on

GitPython

Have you ever written a deployment script with subprocess.run(["git", "pull"]) and felt like there just must be a better way? There is! GitPython provides you with a real Python API for doing anything Git can do: clone, commit, create branches, diffs, read history, etc.

Installation

pip install gitpython
Enter fullscreen mode Exit fullscreen mode

GitPython assumes you have git installed on your system. It uses shelling out to git behind the scenes, translating into Python objects as cleanly as possible.

Opening a Repository

from git import Repo, InvalidGitRepositoryError

# Open an existing repo
repo = Repo("/path/to/your/project")

# Open the repo at the current working directory
repo = Repo(".")

# Open from any subdirectory (search_parent_directories=True)
repo = Repo(".", search_parent_directories=True)

# Safe opening with error handling
try:
    repo = Repo("/some/path")
except InvalidGitRepositoryError:
    print("Not a git repository")
Enter fullscreen mode Exit fullscreen mode

Repository Basics

from git import Repo

repo = Repo(".")

# Basic info
print(repo.working_dir)       # /home/user/myproject
print(repo.git_dir)           # /home/user/myproject/.git
print(repo.is_dirty())        # True if there are uncommitted changes
print(repo.untracked_files)   # ['new_file.py', 'notes.txt']
print(repo.active_branch.name)  # main

# Check if repo has any commits
print(repo.head.is_valid())   # False if repo is empty
Enter fullscreen mode Exit fullscreen mode

Cloning a Repository

from git import Repo

# Clone a public repo
repo = Repo.clone_from(
    "https://github.com/user/myproject.git",
    "/tmp/myproject"
)

# Clone a specific branch
repo = Repo.clone_from(
    "https://github.com/user/myproject.git",
    "/tmp/myproject",
    branch="develop"
)

# Clone with depth (shallow clone — faster for CI)
repo = Repo.clone_from(
    "https://github.com/user/myproject.git",
    "/tmp/myproject",
    depth=1
)

# Clone with SSH
repo = Repo.clone_from(
    "git@github.com:user/myproject.git",
    "/tmp/myproject"
)
Enter fullscreen mode Exit fullscreen mode

Staging and Committing

from git import Repo
import os

repo = Repo(".")

# Stage specific files
repo.index.add(["README.md", "src/main.py"])

# Stage all changes (like `git add .`)
repo.git.add(A=True)

# Or using the index directly
changed_files = [item.a_path for item in repo.index.diff(None)]
untracked = repo.untracked_files
repo.index.add(changed_files + untracked)

# Commit
commit = repo.index.commit(
    "feat: add user authentication module",
    author_date="2024-01-15T10:00:00",
    commit_date="2024-01-15T10:00:00",
)
print(f"Committed: {commit.hexsha[:7]}{commit.message.strip()}")
Enter fullscreen mode Exit fullscreen mode

Commit with Custom Author

from git import Repo, Actor

repo = Repo(".")

author = Actor("Alice", "alice@example.com")
committer = Actor("CI Bot", "ci@example.com")

repo.index.add(["deploy.yaml"])
repo.index.commit(
    "chore: update deployment config",
    author=author,
    committer=committer,
)
Enter fullscreen mode Exit fullscreen mode

Branches

from git import Repo

repo = Repo(".")

# List all branches
for branch in repo.branches:
    print(branch.name)

# List remote branches
for ref in repo.remotes.origin.refs:
    print(ref.name)

# Create a new branch
new_branch = repo.create_head("feature/login")

# Switch to a branch (checkout)
new_branch.checkout()

# Or one-liner
repo.git.checkout("-b", "feature/signup")

# Delete a branch
repo.delete_head("feature/old-stuff", force=True)

# Check current branch
print(repo.active_branch.name)  # feature/login
Enter fullscreen mode Exit fullscreen mode

Working with Remotes

from git import Repo

repo = Repo(".")

# List remotes
for remote in repo.remotes:
    print(f"{remote.name}: {remote.url}")

# Fetch
repo.remotes.origin.fetch()

# Pull
repo.remotes.origin.pull()

# Push
repo.remotes.origin.push()

# Push a specific branch
repo.remotes.origin.push(refspec="feature/login:feature/login")

# Add a new remote
repo.create_remote("upstream", "https://github.com/original/repo.git")

# Remove a remote
repo.delete_remote("upstream")
Enter fullscreen mode Exit fullscreen mode

Reading Commit History

from git import Repo

repo = Repo(".")

# Iterate over commit history
for commit in repo.iter_commits("main", max_count=10):
    print(f"{commit.hexsha[:7]}  {commit.authored_datetime}  {commit.author.name}")
    print(f"  {commit.message.strip()}")
    print()
Enter fullscreen mode Exit fullscreen mode
a3f1c22  2024-01-15 10:30:00  Alice
  feat: add payment gateway

7b2d891  2024-01-14 16:45:00  Bob
  fix: correct validation logic
Enter fullscreen mode Exit fullscreen mode

Filter by Author, Path, or Date

from datetime import datetime

# Commits by a specific author
for commit in repo.iter_commits("main", author="Alice"):
    print(commit.message.strip())

# Commits touching a specific file
for commit in repo.iter_commits("main", paths="src/auth.py"):
    print(f"{commit.hexsha[:7]} {commit.message.strip()}")

# Commits since a date
since = datetime(2024, 1, 1)
for commit in repo.iter_commits("main"):
    if commit.authored_datetime.replace(tzinfo=None) < since:
        break
    print(commit.message.strip())
Enter fullscreen mode Exit fullscreen mode

Diffs: What Changed?

from git import Repo

repo = Repo(".")

# Diff between working directory and index (unstaged changes)
for diff in repo.index.diff(None):
    print(f"Modified: {diff.a_path}")

# Diff between index and HEAD (staged changes)
for diff in repo.index.diff("HEAD"):
    print(f"Staged: {diff.a_path}")

# Diff between two commits
commits = list(repo.iter_commits("main", max_count=2))
diffs = commits[1].diff(commits[0])

for diff in diffs:
    print(f"Changed: {diff.a_path}")
    if diff.diff:
        print(diff.diff.decode("utf-8"))
Enter fullscreen mode Exit fullscreen mode

Tags

from git import Repo

repo = Repo(".")

# List all tags
for tag in repo.tags:
    print(f"{tag.name}{tag.commit.hexsha[:7]}")

# Create a lightweight tag
repo.create_tag("v1.0.0")

# Create an annotated tag
repo.create_tag(
    "v1.0.0",
    message="Release version 1.0.0",
    ref="main"
)

# Delete a tag
repo.delete_tag("v0.9.0")

# Push tags to remote
repo.remotes.origin.push(tags=True)
Enter fullscreen mode Exit fullscreen mode

Reading File Contents from Git

You can read file contents from any commit without touching the filesystem:

from git import Repo

repo = Repo(".")

# Read a file at HEAD
blob = repo.head.commit.tree["README.md"]
content = blob.data_stream.read().decode("utf-8")
print(content)

# Read from a specific commit
commit = repo.commit("a3f1c22")
blob = commit.tree["src/main.py"]
print(blob.data_stream.read().decode("utf-8"))

# Navigate into subdirectories
blob = repo.head.commit.tree["src"]["auth"]["jwt.py"]
print(blob.data_stream.read().decode("utf-8"))
Enter fullscreen mode Exit fullscreen mode

Submodules

from git import Repo

repo = Repo(".")

# List submodules
for submodule in repo.submodules:
    print(f"{submodule.name}: {submodule.url}")

# Add a submodule
repo.create_submodule("mylib", "libs/mylib", url="https://github.com/user/mylib.git")

# Update all submodules
for submodule in repo.submodules:
    submodule.update(init=True)
Enter fullscreen mode Exit fullscreen mode

Real-World Patterns

Auto-Commit Changed Files

from git import Repo, Actor
from datetime import datetime

def auto_commit(repo_path: str, message: str = None):
    repo = Repo(repo_path)

    if not repo.is_dirty(untracked_files=True):
        print("Nothing to commit")
        return

    # Stage everything
    repo.git.add(A=True)

    msg = message or f"auto: update {datetime.now().strftime('%Y-%m-%d %H:%M')}"
    commit = repo.index.commit(msg, author=Actor("AutoBot", "bot@example.com"))
    print(f"Committed: {commit.hexsha[:7]}")
    return commit

auto_commit(".", "chore: automated sync")
Enter fullscreen mode Exit fullscreen mode

Generate a Changelog

from git import Repo
from collections import defaultdict

def generate_changelog(repo_path: str, from_tag: str, to_tag: str = "HEAD") -> str:
    repo = Repo(repo_path)
    commits = list(repo.iter_commits(f"{from_tag}..{to_tag}"))

    categories = defaultdict(list)
    for commit in commits:
        msg = commit.message.strip().split("\n")[0]
        if msg.startswith("feat"):
            categories["Features"].append(msg)
        elif msg.startswith("fix"):
            categories["Bug Fixes"].append(msg)
        elif msg.startswith("chore") or msg.startswith("ci"):
            categories["Maintenance"].append(msg)
        else:
            categories["Other"].append(msg)

    lines = [f"# Changelog: {from_tag}{to_tag}\n"]
    for category, items in categories.items():
        lines.append(f"\n## {category}")
        for item in items:
            lines.append(f"- {item}")

    return "\n".join(lines)

print(generate_changelog(".", "v1.0.0", "v1.1.0"))
Enter fullscreen mode Exit fullscreen mode

Find Who Last Modified a Line (git blame)

from git import Repo

def blame_file(repo_path: str, file_path: str):
    repo = Repo(repo_path)
    blame = repo.blame("HEAD", file_path)

    for commit, lines in blame:
        for line in lines:
            print(f"{commit.hexsha[:7]}  {commit.author.name:<20}  {line.decode('utf-8')}", end="")

blame_file(".", "src/auth.py")
Enter fullscreen mode Exit fullscreen mode

Check if Branch Is Behind Remote

from git import Repo

def check_sync_status(repo_path: str):
    repo = Repo(repo_path)
    origin = repo.remotes.origin
    origin.fetch()

    branch = repo.active_branch
    tracking = branch.tracking_branch()

    if tracking is None:
        print("Branch has no remote tracking")
        return

    ahead = list(repo.iter_commits(f"{tracking}..{branch}"))
    behind = list(repo.iter_commits(f"{branch}..{tracking}"))

    print(f"Branch '{branch.name}':")
    print(f"  Ahead by:  {len(ahead)} commit(s)")
    print(f"  Behind by: {len(behind)} commit(s)")

check_sync_status(".")
Enter fullscreen mode Exit fullscreen mode

Using the Raw Git Interface

For executing Git commands not exposed through GitPython's API, call repo.git with any git command you want to run:

from git import Repo

repo = Repo(".")

# Any git command as a method call
output = repo.git.log("--oneline", "-5")
print(output)

# git stash
repo.git.stash("save", "work in progress")
repo.git.stash("pop")

# git cherry-pick
repo.git.cherry_pick("a3f1c22")

# git rebase
repo.git.rebase("main")
Enter fullscreen mode Exit fullscreen mode

Summarizing

GitPython elevates Git from a shell-scriptable command line tool to a fully fledged Python object. Writing deployment tools, code analysis apps, changelog generators or scripts to massage repository data? GitPython has you covered. No more cutting and parsing subprocess output.

Ideally suited to automation: anything you would normally do by hand, with a series of git commands, can be turned into a neat, testable Python function.

Top comments (0)