DEV Community

Artem Ptushkin
Artem Ptushkin

Posted on

2

Gitlab python-based job to remove stale branches

Problem

GitLab has a concept of stale branches, where they defined all the branches updated more than 3 months ago as stale.

However, it's not possible to do a bulk removal or have a more sophisticated control of these branches.

Here is a full example of how you can set this up. As for scheduled tasks you can use Gitlab scheduled jobs.

Code

remove-stale-branches:
needs: []
image: python:slim
variables:
THRESHOLD_PERIOD_DAYS: 30
STALE_PROJECT_NAMESPACES: "$CI_PROJECT_PATH"
parallel:
matrix:
- STALE_PROJECT_NAMESPACES: "foo/baz"
STALE_EXCLUSION_PATTERNS: "example/.*"
rules:
- if: "$CI_PIPELINE_SOURCE == 'schedule'"
when: always
script:
- pip install requests
- python3 clean-stale-branches.py --project-namespace=$STALE_PROJECT_NAMESPACES --days-threshold=$THRESHOLD_PERIOD_DAYS --ignore-patterns=$STALE_EXCLUSION_PATTERNS
view raw .gitlab-ci.yml hosted with ❤ by GitHub
import os
import requests
from urllib.parse import quote
import re
import argparse
from datetime import datetime, timedelta, timezone
def get_arguments():
parser = argparse.ArgumentParser(description="Remove outdated GitLab branches based on specified criteria.")
parser.add_argument("--days-threshold", type=int, required=True, default=30, help="Number of days to consider a branch stale.")
parser.add_argument("--namespace", required=True, help="Namespace of the GitLab project.")
parser.add_argument("--ignore-patterns", help="Comma-separated string of patterns to exclude branches from deletion.")
return parser.parse_args()
def main():
args = get_arguments()
days_limit = args.days_threshold
project_space = args.namespace
namespace_encoded = quote(project_space, safe='')
access_token = os.environ.get("GITLAB_ACCESS_TOKEN")
if access_token is None:
print("Error: Missing GitLab access token in environment variables.")
exit(1)
request_headers = {"PRIVATE-TOKEN": access_token}
ignore_list = args.ignore_patterns.split(",") if args.ignore_patterns else []
ignore_regex = [re.compile(pattern) for pattern in ignore_list]
group_api_url = f"https://gitlab.com/api/v4/groups/{namespace_encoded}"
project_api_url = f"https://gitlab.com/api/v4/projects/{namespace_encoded}"
group_response = requests.get(group_api_url, headers=request_headers)
if group_response.status_code == 404:
print(f"Identified {namespace_encoded} as a project")
project_info = requests.get(project_api_url, headers=request_headers).json()
handle_project(ignore_regex, project_info['id'], request_headers, days_limit, project_info['name'])
else:
print(f"Identified {namespace_encoded} as a group, processing all contained projects")
group_info = group_response.json()
for project in group_info['projects']:
handle_project(ignore_regex, project['id'], request_headers, days_limit, project['name'])
def handle_project(ignore_regex, project_id, headers, days_limit, project_name):
print("=========")
print(f"Working on project {project_name}")
branches_api_url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/branches?per_page=100"
branches_response = requests.get(branches_api_url, headers=headers)
if branches_response.status_code == 200:
branches_info = branches_response.json()
stale_date = datetime.utcnow() - timedelta(days=days_limit)
stale_date = stale_date.replace(tzinfo=timezone.utc)
stale_branches = [
branch for branch in branches_info
if
(datetime.fromisoformat(branch['commit']['committed_date']).replace(tzinfo=timezone.utc) < stale_date)
and not any(regex.match(branch['name']) for regex in ignore_regex)
and not branch.get('protected', True)
]
print(f"Branches not updated in over {days_limit} days, count:", len(stale_branches))
print(f"Branches to be deleted:\n", [branch['name'] for branch in stale_branches])
for branch in stale_branches:
branch_encoded = quote(branch['name'], safe='')
delete_api_url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/branches/{branch_encoded}"
delete_response = requests.delete(delete_api_url, headers=headers)
if delete_response.status_code == 204:
print(f"Successfully deleted branch '{branch['name']}'.")
else:
print(f"Failed to delete branch '{branch['name']}': Status Code {delete_response.status_code}")
else:
print(f"Error: Unable to fetch branches. Status Code: {branches_response.status_code}")
exit(1)
if __name__ == "__main__":
main()

Top comments (0)

AWS Security LIVE!

Join us for AWS Security LIVE!

Discover the future of cloud security. Tune in live for trends, tips, and solutions from AWS and AWS Partners.

Learn More

👋 Kindness is contagious

Please leave a ❤️ or a friendly comment on this post if you found it helpful!

Okay