Have you ever written a deployment script with subprocess.run(["git", "pull"]) and felt like there just must be a better way? There is! GitPython provides you with a real Python API for doing anything Git can do: clone, commit, create branches, diffs, read history, etc.
Installation
pip install gitpython
GitPython assumes you have git installed on your system. It uses shelling out to git behind the scenes, translating into Python objects as cleanly as possible.
Opening a Repository
from git import Repo, InvalidGitRepositoryError
# Open an existing repo
repo = Repo("/path/to/your/project")
# Open the repo at the current working directory
repo = Repo(".")
# Open from any subdirectory (search_parent_directories=True)
repo = Repo(".", search_parent_directories=True)
# Safe opening with error handling
try:
repo = Repo("/some/path")
except InvalidGitRepositoryError:
print("Not a git repository")
Repository Basics
from git import Repo
repo = Repo(".")
# Basic info
print(repo.working_dir) # /home/user/myproject
print(repo.git_dir) # /home/user/myproject/.git
print(repo.is_dirty()) # True if there are uncommitted changes
print(repo.untracked_files) # ['new_file.py', 'notes.txt']
print(repo.active_branch.name) # main
# Check if repo has any commits
print(repo.head.is_valid()) # False if repo is empty
Cloning a Repository
from git import Repo
# Clone a public repo
repo = Repo.clone_from(
"https://github.com/user/myproject.git",
"/tmp/myproject"
)
# Clone a specific branch
repo = Repo.clone_from(
"https://github.com/user/myproject.git",
"/tmp/myproject",
branch="develop"
)
# Clone with depth (shallow clone — faster for CI)
repo = Repo.clone_from(
"https://github.com/user/myproject.git",
"/tmp/myproject",
depth=1
)
# Clone with SSH
repo = Repo.clone_from(
"git@github.com:user/myproject.git",
"/tmp/myproject"
)
Staging and Committing
from git import Repo
import os
repo = Repo(".")
# Stage specific files
repo.index.add(["README.md", "src/main.py"])
# Stage all changes (like `git add .`)
repo.git.add(A=True)
# Or using the index directly
changed_files = [item.a_path for item in repo.index.diff(None)]
untracked = repo.untracked_files
repo.index.add(changed_files + untracked)
# Commit
commit = repo.index.commit(
"feat: add user authentication module",
author_date="2024-01-15T10:00:00",
commit_date="2024-01-15T10:00:00",
)
print(f"Committed: {commit.hexsha[:7]} — {commit.message.strip()}")
Commit with Custom Author
from git import Repo, Actor
repo = Repo(".")
author = Actor("Alice", "alice@example.com")
committer = Actor("CI Bot", "ci@example.com")
repo.index.add(["deploy.yaml"])
repo.index.commit(
"chore: update deployment config",
author=author,
committer=committer,
)
Branches
from git import Repo
repo = Repo(".")
# List all branches
for branch in repo.branches:
print(branch.name)
# List remote branches
for ref in repo.remotes.origin.refs:
print(ref.name)
# Create a new branch
new_branch = repo.create_head("feature/login")
# Switch to a branch (checkout)
new_branch.checkout()
# Or one-liner
repo.git.checkout("-b", "feature/signup")
# Delete a branch
repo.delete_head("feature/old-stuff", force=True)
# Check current branch
print(repo.active_branch.name) # feature/login
Working with Remotes
from git import Repo
repo = Repo(".")
# List remotes
for remote in repo.remotes:
print(f"{remote.name}: {remote.url}")
# Fetch
repo.remotes.origin.fetch()
# Pull
repo.remotes.origin.pull()
# Push
repo.remotes.origin.push()
# Push a specific branch
repo.remotes.origin.push(refspec="feature/login:feature/login")
# Add a new remote
repo.create_remote("upstream", "https://github.com/original/repo.git")
# Remove a remote
repo.delete_remote("upstream")
Reading Commit History
from git import Repo
repo = Repo(".")
# Iterate over commit history
for commit in repo.iter_commits("main", max_count=10):
print(f"{commit.hexsha[:7]} {commit.authored_datetime} {commit.author.name}")
print(f" {commit.message.strip()}")
print()
a3f1c22 2024-01-15 10:30:00 Alice
feat: add payment gateway
7b2d891 2024-01-14 16:45:00 Bob
fix: correct validation logic
Filter by Author, Path, or Date
from datetime import datetime
# Commits by a specific author
for commit in repo.iter_commits("main", author="Alice"):
print(commit.message.strip())
# Commits touching a specific file
for commit in repo.iter_commits("main", paths="src/auth.py"):
print(f"{commit.hexsha[:7]} {commit.message.strip()}")
# Commits since a date
since = datetime(2024, 1, 1)
for commit in repo.iter_commits("main"):
if commit.authored_datetime.replace(tzinfo=None) < since:
break
print(commit.message.strip())
Diffs: What Changed?
from git import Repo
repo = Repo(".")
# Diff between working directory and index (unstaged changes)
for diff in repo.index.diff(None):
print(f"Modified: {diff.a_path}")
# Diff between index and HEAD (staged changes)
for diff in repo.index.diff("HEAD"):
print(f"Staged: {diff.a_path}")
# Diff between two commits
commits = list(repo.iter_commits("main", max_count=2))
diffs = commits[1].diff(commits[0])
for diff in diffs:
print(f"Changed: {diff.a_path}")
if diff.diff:
print(diff.diff.decode("utf-8"))
Tags
from git import Repo
repo = Repo(".")
# List all tags
for tag in repo.tags:
print(f"{tag.name} → {tag.commit.hexsha[:7]}")
# Create a lightweight tag
repo.create_tag("v1.0.0")
# Create an annotated tag
repo.create_tag(
"v1.0.0",
message="Release version 1.0.0",
ref="main"
)
# Delete a tag
repo.delete_tag("v0.9.0")
# Push tags to remote
repo.remotes.origin.push(tags=True)
Reading File Contents from Git
You can read file contents from any commit without touching the filesystem:
from git import Repo
repo = Repo(".")
# Read a file at HEAD
blob = repo.head.commit.tree["README.md"]
content = blob.data_stream.read().decode("utf-8")
print(content)
# Read from a specific commit
commit = repo.commit("a3f1c22")
blob = commit.tree["src/main.py"]
print(blob.data_stream.read().decode("utf-8"))
# Navigate into subdirectories
blob = repo.head.commit.tree["src"]["auth"]["jwt.py"]
print(blob.data_stream.read().decode("utf-8"))
Submodules
from git import Repo
repo = Repo(".")
# List submodules
for submodule in repo.submodules:
print(f"{submodule.name}: {submodule.url}")
# Add a submodule
repo.create_submodule("mylib", "libs/mylib", url="https://github.com/user/mylib.git")
# Update all submodules
for submodule in repo.submodules:
submodule.update(init=True)
Real-World Patterns
Auto-Commit Changed Files
from git import Repo, Actor
from datetime import datetime
def auto_commit(repo_path: str, message: str = None):
repo = Repo(repo_path)
if not repo.is_dirty(untracked_files=True):
print("Nothing to commit")
return
# Stage everything
repo.git.add(A=True)
msg = message or f"auto: update {datetime.now().strftime('%Y-%m-%d %H:%M')}"
commit = repo.index.commit(msg, author=Actor("AutoBot", "bot@example.com"))
print(f"Committed: {commit.hexsha[:7]}")
return commit
auto_commit(".", "chore: automated sync")
Generate a Changelog
from git import Repo
from collections import defaultdict
def generate_changelog(repo_path: str, from_tag: str, to_tag: str = "HEAD") -> str:
repo = Repo(repo_path)
commits = list(repo.iter_commits(f"{from_tag}..{to_tag}"))
categories = defaultdict(list)
for commit in commits:
msg = commit.message.strip().split("\n")[0]
if msg.startswith("feat"):
categories["Features"].append(msg)
elif msg.startswith("fix"):
categories["Bug Fixes"].append(msg)
elif msg.startswith("chore") or msg.startswith("ci"):
categories["Maintenance"].append(msg)
else:
categories["Other"].append(msg)
lines = [f"# Changelog: {from_tag} → {to_tag}\n"]
for category, items in categories.items():
lines.append(f"\n## {category}")
for item in items:
lines.append(f"- {item}")
return "\n".join(lines)
print(generate_changelog(".", "v1.0.0", "v1.1.0"))
Find Who Last Modified a Line (git blame)
from git import Repo
def blame_file(repo_path: str, file_path: str):
repo = Repo(repo_path)
blame = repo.blame("HEAD", file_path)
for commit, lines in blame:
for line in lines:
print(f"{commit.hexsha[:7]} {commit.author.name:<20} {line.decode('utf-8')}", end="")
blame_file(".", "src/auth.py")
Check if Branch Is Behind Remote
from git import Repo
def check_sync_status(repo_path: str):
repo = Repo(repo_path)
origin = repo.remotes.origin
origin.fetch()
branch = repo.active_branch
tracking = branch.tracking_branch()
if tracking is None:
print("Branch has no remote tracking")
return
ahead = list(repo.iter_commits(f"{tracking}..{branch}"))
behind = list(repo.iter_commits(f"{branch}..{tracking}"))
print(f"Branch '{branch.name}':")
print(f" Ahead by: {len(ahead)} commit(s)")
print(f" Behind by: {len(behind)} commit(s)")
check_sync_status(".")
Using the Raw Git Interface
For executing Git commands not exposed through GitPython's API, call repo.git with any git command you want to run:
from git import Repo
repo = Repo(".")
# Any git command as a method call
output = repo.git.log("--oneline", "-5")
print(output)
# git stash
repo.git.stash("save", "work in progress")
repo.git.stash("pop")
# git cherry-pick
repo.git.cherry_pick("a3f1c22")
# git rebase
repo.git.rebase("main")
Summarizing
GitPython elevates Git from a shell-scriptable command line tool to a fully fledged Python object. Writing deployment tools, code analysis apps, changelog generators or scripts to massage repository data? GitPython has you covered. No more cutting and parsing subprocess output.
Ideally suited to automation: anything you would normally do by hand, with a series of git commands, can be turned into a neat, testable Python function.
Top comments (0)