Dmitry Romanoff

Posted on Aug 2

Export AWS Untagged Resources to CSV for Analysis and Reporting

#aws #devops #tutorial #python

Managing AWS resources without proper tagging can quickly become a nightmare for cost allocation, compliance, and governance. This Python script scans all AWS regions and exports untagged resources to a structured CSV file for easy analysis and reporting.

Why Export to CSV?

While console output is useful for quick checks, CSV export provides:

Structured data for spreadsheet analysis
Audit trails with timestamped files
Reporting capabilities for management
Integration with other tools and systems
Historical tracking of untagged resources over time

The Solution

This script builds on basic resource scanning by adding structured data export with proper ARN construction for each resource type.

#!/usr/bin/env python3
import boto3
import csv
import os
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

def get_untagged_resources_in_region(region, account_id):
    resources = []
    try:
        session = boto3.Session()

        # EC2 Instances
        ec2 = session.client('ec2', region_name=region)
        instances = ec2.describe_instances()
        for reservation in instances['Reservations']:
            for instance in reservation['Instances']:
                if not instance.get('Tags'):
                    resources.append({
                        'Account': account_id,
                        'Region': region,
                        'Resource': 'EC2 Instance',
                        'ARN': f"arn:aws:ec2:{region}:{account_id}:instance/{instance['InstanceId']}"
                    })

        # EBS Volumes
        volumes = ec2.describe_volumes()
        for volume in volumes['Volumes']:
            if not volume.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'EBS Volume',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:volume/{volume['VolumeId']}"
                })

        # VPCs
        vpcs = ec2.describe_vpcs()
        for vpc in vpcs['Vpcs']:
            if not vpc.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'VPC',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:vpc/{vpc['VpcId']}"
                })

        # Security Groups
        security_groups = ec2.describe_security_groups()
        for sg in security_groups['SecurityGroups']:
            if not sg.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'Security Group',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:security-group/{sg['GroupId']}"
                })

        # Subnets
        subnets = ec2.describe_subnets()
        for subnet in subnets['Subnets']:
            if not subnet.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'Subnet',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:subnet/{subnet['SubnetId']}"
                })

        # Lambda Functions
        lambda_client = session.client('lambda', region_name=region)
        functions = lambda_client.list_functions()
        for function in functions['Functions']:
            try:
                tags = lambda_client.list_tags(Resource=function['FunctionArn'])
                if not tags.get('Tags'):
                    resources.append({
                        'Account': account_id,
                        'Region': region,
                        'Resource': 'Lambda Function',
                        'ARN': function['FunctionArn']
                    })
            except:
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'Lambda Function',
                    'ARN': function['FunctionArn']
                })

        # RDS Instances
        rds = session.client('rds', region_name=region)
        instances = rds.describe_db_instances()
        for instance in instances['DBInstances']:
            try:
                tags = rds.list_tags_for_resource(ResourceName=instance['DBInstanceArn'])
                if not tags.get('TagList'):
                    resources.append({
                        'Account': account_id,
                        'Region': region,
                        'Resource': 'RDS Instance',
                        'ARN': instance['DBInstanceArn']
                    })
            except:
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'RDS Instance',
                    'ARN': instance['DBInstanceArn']
                })

    except:
        pass

    return resources

def main():
    session = boto3.Session()

    # Get account ID
    sts = session.client('sts')
    account_id = sts.get_caller_identity()['Account']

    # Get regions
    regions = [r['RegionName'] for r in session.client('ec2').describe_regions()['Regions']]

    all_resources = []

    with ThreadPoolExecutor(max_workers=15) as executor:
        futures = [executor.submit(get_untagged_resources_in_region, region, account_id) for region in regions]

        for future in as_completed(futures):
            resources = future.result()
            all_resources.extend(resources)

    # S3 Buckets (global)
    try:
        s3 = session.client('s3')
        buckets = s3.list_buckets()
        for bucket in buckets['Buckets']:
            try:
                s3.get_bucket_tagging(Bucket=bucket['Name'])
            except:
                all_resources.append({
                    'Account': account_id,
                    'Region': 'Global',
                    'Resource': 'S3 Bucket',
                    'ARN': f"arn:aws:s3:::{bucket['Name']}"
                })
    except:
        pass

    # Create output directory and filename
    os.makedirs('output', exist_ok=True)
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f'output/untagged_resources_{timestamp}.csv'

    # Export to CSV
    if all_resources:
        with open(filename, 'w', newline='') as csvfile:
            fieldnames = ['Account', 'Region', 'Resource', 'ARN']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_resources)
        print(f"Exported {len(all_resources)} untagged resources to {filename}")
    else:
        print("No untagged resources found")

if __name__ == "__main__":
    main()

Key Features

Account identification: Uses STS to get current AWS account ID
Proper ARN construction: Builds correct ARNs for each resource type
Timestamped output: Creates unique files with YYYYMMDD_HHMMSS format
Organized storage: Saves files in output/ directory
CSV format: Easy to import into Excel, Google Sheets, or databases

CSV Output Structure

The exported CSV contains four columns:

Account	Region	Resource	ARN
123456789012	us-east-1	EC2 Instance	arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0
123456789012	us-west-2	S3 Bucket	arn:aws:s3:::my-untagged-bucket
123456789012	eu-west-1	RDS Instance	arn:aws:rds:eu-west-1:123456789012:db:mydb

Setup and Usage

Install boto3:

pip install boto3

Configure AWS credentials:

aws configure

Run the script:

python get_untagged_resources_per_region_excel.py

Find your report:

output/untagged_resources_20241220_143052.csv

Required IAM Permissions

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "sts:GetCallerIdentity",
                "ec2:DescribeInstances",
                "ec2:DescribeVolumes",
                "ec2:DescribeVpcs",
                "ec2:DescribeSecurityGroups",
                "ec2:DescribeSubnets",
                "ec2:DescribeRegions",
                "s3:ListAllMyBuckets",
                "s3:GetBucketTagging",
                "lambda:ListFunctions",
                "lambda:ListTags",
                "rds:DescribeDBInstances",
                "rds:ListTagsForResource"
            ],
            "Resource": "*"
        }
    ]
}

Use Cases for CSV Export

Compliance reporting: Generate monthly untagged resource reports
Cost analysis: Import into BI tools for cost allocation analysis
Automation: Feed data into tagging automation scripts
Tracking progress: Compare reports over time to measure tagging improvements
Multi-account analysis: Combine reports from multiple AWS accounts

Advanced Analysis

Once you have the CSV, you can analyze it without external dependencies:

#!/usr/bin/env python3
import csv
from collections import Counter

def analyze_csv(filename):
    """Analyze untagged resources CSV without pandas"""
    resources = []

    try:
        with open(filename, 'r') as csvfile:
            reader = csv.DictReader(csvfile)
            resources = list(reader)
    except FileNotFoundError:
        print(f"File {filename} not found")
        return

    if not resources:
        print("No data found in CSV")
        return

    print(f"Total untagged resources: {len(resources)}")

    # Count by resource type
    resource_counts = Counter(row['Resource'] for row in resources)
    print("\nResources by type:")
    for resource_type, count in resource_counts.most_common():
        print(f"  {resource_type}: {count}")

    # Count by region
    region_counts = Counter(row['Region'] for row in resources)
    print("\nResources by region:")
    for region, count in region_counts.most_common():
        print(f"  {region}: {count}")

    # Filter EC2 instances
    ec2_instances = [row for row in resources if row['Resource'] == 'EC2 Instance']
    if ec2_instances:
        print(f"\nEC2 Instances ({len(ec2_instances)}):")
        for instance in ec2_instances[:5]:  # Show first 5
            print(f"  {instance['Region']}: {instance['ARN']}")
        if len(ec2_instances) > 5:
            print(f"  ... and {len(ec2_instances) - 5} more")

if __name__ == "__main__":
    # Find the latest CSV file
    import os
    import glob

    csv_files = glob.glob('output/untagged_resources_*.csv')
    if csv_files:
        latest_file = max(csv_files, key=os.path.getctime)
        print(f"Analyzing: {latest_file}\n")
        analyze_csv(latest_file)
    else:
        print("No CSV files found in output/ directory")
        print("Run get_untagged_resources_per_region_excel.py first")

Or use the included analysis script:

python advanced_analysis.py

Conclusion

Structured data export transforms untagged resource discovery from a one-time check into a systematic governance process. The CSV format enables integration with existing reporting workflows and provides the foundation for automated remediation efforts.

Regular exports help track tagging compliance over time and provide the data needed for informed decisions about AWS resource governance policies.

DEV Community