DEV Community

Jacob
Jacob

Posted on

Purge a glue table from cli

Sometimes you want to purge a glue table from s3 and delete all files and versions. Lately i had to do this often so i created this small script to do this.

It shows a list of glue tables for the current aws account and with selecting the number will delete the files and the table from glue.

import boto3
import argparse
import awswrangler as wr
from pprint import pprint


def get_glue_tables():
    tables = [
        {
            "name": table["Name"],
            "database": table["DatabaseName"],
            "path": table["StorageDescriptor"]["Location"][5:],  # Remove "s3://" prefix
        }
        for table in wr.catalog.get_tables()
    ]
    return tables


def display_menu(options):
    print("Select a table:")
    for i, option in enumerate(options, 1):
        print(f"{i}. {option['database']:30} {option['name']:10} {option['path']}")

    print("0. Exit")

    while True:
        choice = input("Enter the number of your choice: ")
        if choice.isdigit():
            choice = int(choice)
            if 0 <= choice < len(options):
                return choice
            print("Invalid choice. Please enter a valid number.")
        else:
            print("Invalid input. Please enter a valid number.")


def display_sure():
    while True:
        choice = input("Sure to delete? (yes/no): ").strip().lower()
        if choice == "yes":
            return True
        elif choice == "no":
            return False
        print("Invalid input. Please enter 'yes' or 'no.")


def bucket_action(table, delete=False):
    path = table["path"]
    bucket_name, subfolder = path.split("/", 1)

    s3 = boto3.client("s3")

    def process_object(version):
        if delete:
            resp = s3.delete_object(
                Bucket=bucket_name,
                Key=version["Key"],
                VersionId=version["VersionId"],
            )
            print(".", end="")
        else:
            print(
                f"Object key: {version['Key']}, Version ID: {version['VersionId']}"
            )

    response = s3.list_object_versions(Bucket=bucket_name, Prefix=subfolder)

    for version in response.get("Versions", []):
        process_object(version)

    while response.get("IsTruncated"):
        response = s3.list_object_versions(
            Bucket=bucket_name,
            Prefix=subfolder,
            KeyMarker=response.get("NextKeyMarker"),
            VersionIdMarker=response.get("NextVersionIdMarker"),
        )
        for version in response.get("Versions", []):
            process_object(version)

    if delete:
        print("Deleting glue table")
        wr.catalog.delete_table_if_exists(
            database=table["database"], table=table["name"]
        )


while True:
    tables = get_glue_tables()
    option = display_menu(tables)
    if option == 0:
        break

    selected_table = tables[option - 1]
    bucket_action(selected_table)

    if display_sure():
        bucket_action(selected_table, delete=True)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)