Forem

Artem Ptushkin
Artem Ptushkin

Posted on

2

Gitlab python-based job to remove stale branches

Problem

GitLab has a concept of stale branches, where they defined all the branches updated more than 3 months ago as stale.

However, it's not possible to do a bulk removal or have a more sophisticated control of these branches.

Here is a full example of how you can set this up. As for scheduled tasks you can use Gitlab scheduled jobs.

Code

remove-stale-branches:
needs: []
image: python:slim
variables:
THRESHOLD_PERIOD_DAYS: 30
STALE_PROJECT_NAMESPACES: "$CI_PROJECT_PATH"
parallel:
matrix:
- STALE_PROJECT_NAMESPACES: "foo/baz"
STALE_EXCLUSION_PATTERNS: "example/.*"
rules:
- if: "$CI_PIPELINE_SOURCE == 'schedule'"
when: always
script:
- pip install requests
- python3 clean-stale-branches.py --project-namespace=$STALE_PROJECT_NAMESPACES --days-threshold=$THRESHOLD_PERIOD_DAYS --ignore-patterns=$STALE_EXCLUSION_PATTERNS
view raw .gitlab-ci.yml hosted with ❤ by GitHub
import os
import requests
from urllib.parse import quote
import re
import argparse
from datetime import datetime, timedelta, timezone
def get_arguments():
parser = argparse.ArgumentParser(description="Remove outdated GitLab branches based on specified criteria.")
parser.add_argument("--days-threshold", type=int, required=True, default=30, help="Number of days to consider a branch stale.")
parser.add_argument("--namespace", required=True, help="Namespace of the GitLab project.")
parser.add_argument("--ignore-patterns", help="Comma-separated string of patterns to exclude branches from deletion.")
return parser.parse_args()
def main():
args = get_arguments()
days_limit = args.days_threshold
project_space = args.namespace
namespace_encoded = quote(project_space, safe='')
access_token = os.environ.get("GITLAB_ACCESS_TOKEN")
if access_token is None:
print("Error: Missing GitLab access token in environment variables.")
exit(1)
request_headers = {"PRIVATE-TOKEN": access_token}
ignore_list = args.ignore_patterns.split(",") if args.ignore_patterns else []
ignore_regex = [re.compile(pattern) for pattern in ignore_list]
group_api_url = f"https://gitlab.com/api/v4/groups/{namespace_encoded}"
project_api_url = f"https://gitlab.com/api/v4/projects/{namespace_encoded}"
group_response = requests.get(group_api_url, headers=request_headers)
if group_response.status_code == 404:
print(f"Identified {namespace_encoded} as a project")
project_info = requests.get(project_api_url, headers=request_headers).json()
handle_project(ignore_regex, project_info['id'], request_headers, days_limit, project_info['name'])
else:
print(f"Identified {namespace_encoded} as a group, processing all contained projects")
group_info = group_response.json()
for project in group_info['projects']:
handle_project(ignore_regex, project['id'], request_headers, days_limit, project['name'])
def handle_project(ignore_regex, project_id, headers, days_limit, project_name):
print("=========")
print(f"Working on project {project_name}")
branches_api_url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/branches?per_page=100"
branches_response = requests.get(branches_api_url, headers=headers)
if branches_response.status_code == 200:
branches_info = branches_response.json()
stale_date = datetime.utcnow() - timedelta(days=days_limit)
stale_date = stale_date.replace(tzinfo=timezone.utc)
stale_branches = [
branch for branch in branches_info
if
(datetime.fromisoformat(branch['commit']['committed_date']).replace(tzinfo=timezone.utc) < stale_date)
and not any(regex.match(branch['name']) for regex in ignore_regex)
and not branch.get('protected', True)
]
print(f"Branches not updated in over {days_limit} days, count:", len(stale_branches))
print(f"Branches to be deleted:\n", [branch['name'] for branch in stale_branches])
for branch in stale_branches:
branch_encoded = quote(branch['name'], safe='')
delete_api_url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/branches/{branch_encoded}"
delete_response = requests.delete(delete_api_url, headers=headers)
if delete_response.status_code == 204:
print(f"Successfully deleted branch '{branch['name']}'.")
else:
print(f"Failed to delete branch '{branch['name']}': Status Code {delete_response.status_code}")
else:
print(f"Error: Unable to fetch branches. Status Code: {branches_response.status_code}")
exit(1)
if __name__ == "__main__":
main()

Top comments (0)