DEV Community

Jacob
Jacob

Posted on

Purge a glue table from cli

Sometimes you want to purge a glue table from s3 and delete all files and versions. Lately i had to do this often so i created this small script to do this.

It shows a list of glue tables for the current aws account and with selecting the number will delete the files and the table from glue.

import boto3
import argparse
import awswrangler as wr
from pprint import pprint


def get_glue_tables():
    tables = [
        {
            "name": table["Name"],
            "database": table["DatabaseName"],
            "path": table["StorageDescriptor"]["Location"][5:],  # Remove "s3://" prefix
        }
        for table in wr.catalog.get_tables()
    ]
    return tables


def display_menu(options):
    print("Select a table:")
    for i, option in enumerate(options, 1):
        print(f"{i}. {option['database']:30} {option['name']:10} {option['path']}")

    print("0. Exit")

    while True:
        choice = input("Enter the number of your choice: ")
        if choice.isdigit():
            choice = int(choice)
            if 0 <= choice < len(options):
                return choice
            print("Invalid choice. Please enter a valid number.")
        else:
            print("Invalid input. Please enter a valid number.")


def display_sure():
    while True:
        choice = input("Sure to delete? (yes/no): ").strip().lower()
        if choice == "yes":
            return True
        elif choice == "no":
            return False
        print("Invalid input. Please enter 'yes' or 'no.")


def bucket_action(table, delete=False):
    path = table["path"]
    bucket_name, subfolder = path.split("/", 1)

    s3 = boto3.client("s3")

    def process_object(version):
        if delete:
            resp = s3.delete_object(
                Bucket=bucket_name,
                Key=version["Key"],
                VersionId=version["VersionId"],
            )
            print(".", end="")
        else:
            print(
                f"Object key: {version['Key']}, Version ID: {version['VersionId']}"
            )

    response = s3.list_object_versions(Bucket=bucket_name, Prefix=subfolder)

    for version in response.get("Versions", []):
        process_object(version)

    while response.get("IsTruncated"):
        response = s3.list_object_versions(
            Bucket=bucket_name,
            Prefix=subfolder,
            KeyMarker=response.get("NextKeyMarker"),
            VersionIdMarker=response.get("NextVersionIdMarker"),
        )
        for version in response.get("Versions", []):
            process_object(version)

    if delete:
        print("Deleting glue table")
        wr.catalog.delete_table_if_exists(
            database=table["database"], table=table["name"]
        )


while True:
    tables = get_glue_tables()
    option = display_menu(tables)
    if option == 0:
        break

    selected_table = tables[option - 1]
    bucket_action(selected_table)

    if display_sure():
        bucket_action(selected_table, delete=True)
Enter fullscreen mode Exit fullscreen mode

Billboard image

The Next Generation Developer Platform

Coherence is the first Platform-as-a-Service you can control. Unlike "black-box" platforms that are opinionated about the infra you can deploy, Coherence is powered by CNC, the open-source IaC framework, which offers limitless customization.

Learn more

Top comments (0)

A Workflow Copilot. Tailored to You.

Pieces.app image

Our desktop app, with its intelligent copilot, streamlines coding by generating snippets, extracting code from screenshots, and accelerating problem-solving.

Read the docs