DEV Community

Dmitry Romanoff
Dmitry Romanoff

Posted on

Export AWS Untagged Resources to CSV for Analysis and Reporting

Managing AWS resources without proper tagging can quickly become a nightmare for cost allocation, compliance, and governance. This Python script scans all AWS regions and exports untagged resources to a structured CSV file for easy analysis and reporting.

Why Export to CSV?

While console output is useful for quick checks, CSV export provides:

  • Structured data for spreadsheet analysis
  • Audit trails with timestamped files
  • Reporting capabilities for management
  • Integration with other tools and systems
  • Historical tracking of untagged resources over time

The Solution

This script builds on basic resource scanning by adding structured data export with proper ARN construction for each resource type.

#!/usr/bin/env python3
import boto3
import csv
import os
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

def get_untagged_resources_in_region(region, account_id):
    resources = []
    try:
        session = boto3.Session()

        # EC2 Instances
        ec2 = session.client('ec2', region_name=region)
        instances = ec2.describe_instances()
        for reservation in instances['Reservations']:
            for instance in reservation['Instances']:
                if not instance.get('Tags'):
                    resources.append({
                        'Account': account_id,
                        'Region': region,
                        'Resource': 'EC2 Instance',
                        'ARN': f"arn:aws:ec2:{region}:{account_id}:instance/{instance['InstanceId']}"
                    })

        # EBS Volumes
        volumes = ec2.describe_volumes()
        for volume in volumes['Volumes']:
            if not volume.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'EBS Volume',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:volume/{volume['VolumeId']}"
                })

        # VPCs
        vpcs = ec2.describe_vpcs()
        for vpc in vpcs['Vpcs']:
            if not vpc.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'VPC',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:vpc/{vpc['VpcId']}"
                })

        # Security Groups
        security_groups = ec2.describe_security_groups()
        for sg in security_groups['SecurityGroups']:
            if not sg.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'Security Group',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:security-group/{sg['GroupId']}"
                })

        # Subnets
        subnets = ec2.describe_subnets()
        for subnet in subnets['Subnets']:
            if not subnet.get('Tags'):
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'Subnet',
                    'ARN': f"arn:aws:ec2:{region}:{account_id}:subnet/{subnet['SubnetId']}"
                })

        # Lambda Functions
        lambda_client = session.client('lambda', region_name=region)
        functions = lambda_client.list_functions()
        for function in functions['Functions']:
            try:
                tags = lambda_client.list_tags(Resource=function['FunctionArn'])
                if not tags.get('Tags'):
                    resources.append({
                        'Account': account_id,
                        'Region': region,
                        'Resource': 'Lambda Function',
                        'ARN': function['FunctionArn']
                    })
            except:
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'Lambda Function',
                    'ARN': function['FunctionArn']
                })

        # RDS Instances
        rds = session.client('rds', region_name=region)
        instances = rds.describe_db_instances()
        for instance in instances['DBInstances']:
            try:
                tags = rds.list_tags_for_resource(ResourceName=instance['DBInstanceArn'])
                if not tags.get('TagList'):
                    resources.append({
                        'Account': account_id,
                        'Region': region,
                        'Resource': 'RDS Instance',
                        'ARN': instance['DBInstanceArn']
                    })
            except:
                resources.append({
                    'Account': account_id,
                    'Region': region,
                    'Resource': 'RDS Instance',
                    'ARN': instance['DBInstanceArn']
                })

    except:
        pass

    return resources

def main():
    session = boto3.Session()

    # Get account ID
    sts = session.client('sts')
    account_id = sts.get_caller_identity()['Account']

    # Get regions
    regions = [r['RegionName'] for r in session.client('ec2').describe_regions()['Regions']]

    all_resources = []

    with ThreadPoolExecutor(max_workers=15) as executor:
        futures = [executor.submit(get_untagged_resources_in_region, region, account_id) for region in regions]

        for future in as_completed(futures):
            resources = future.result()
            all_resources.extend(resources)

    # S3 Buckets (global)
    try:
        s3 = session.client('s3')
        buckets = s3.list_buckets()
        for bucket in buckets['Buckets']:
            try:
                s3.get_bucket_tagging(Bucket=bucket['Name'])
            except:
                all_resources.append({
                    'Account': account_id,
                    'Region': 'Global',
                    'Resource': 'S3 Bucket',
                    'ARN': f"arn:aws:s3:::{bucket['Name']}"
                })
    except:
        pass

    # Create output directory and filename
    os.makedirs('output', exist_ok=True)
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f'output/untagged_resources_{timestamp}.csv'

    # Export to CSV
    if all_resources:
        with open(filename, 'w', newline='') as csvfile:
            fieldnames = ['Account', 'Region', 'Resource', 'ARN']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_resources)
        print(f"Exported {len(all_resources)} untagged resources to {filename}")
    else:
        print("No untagged resources found")

if __name__ == "__main__":
    main()
Enter fullscreen mode Exit fullscreen mode

Key Features

  • Account identification: Uses STS to get current AWS account ID
  • Proper ARN construction: Builds correct ARNs for each resource type
  • Timestamped output: Creates unique files with YYYYMMDD_HHMMSS format
  • Organized storage: Saves files in output/ directory
  • CSV format: Easy to import into Excel, Google Sheets, or databases

CSV Output Structure

The exported CSV contains four columns:

Account Region Resource ARN
123456789012 us-east-1 EC2 Instance arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0
123456789012 us-west-2 S3 Bucket arn:aws:s3:::my-untagged-bucket
123456789012 eu-west-1 RDS Instance arn:aws:rds:eu-west-1:123456789012:db:mydb

Setup and Usage

  1. Install boto3:
pip install boto3
Enter fullscreen mode Exit fullscreen mode
  1. Configure AWS credentials:
aws configure
Enter fullscreen mode Exit fullscreen mode
  1. Run the script:
python get_untagged_resources_per_region_excel.py
Enter fullscreen mode Exit fullscreen mode
  1. Find your report:
output/untagged_resources_20241220_143052.csv
Enter fullscreen mode Exit fullscreen mode

Required IAM Permissions

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "sts:GetCallerIdentity",
                "ec2:DescribeInstances",
                "ec2:DescribeVolumes",
                "ec2:DescribeVpcs",
                "ec2:DescribeSecurityGroups",
                "ec2:DescribeSubnets",
                "ec2:DescribeRegions",
                "s3:ListAllMyBuckets",
                "s3:GetBucketTagging",
                "lambda:ListFunctions",
                "lambda:ListTags",
                "rds:DescribeDBInstances",
                "rds:ListTagsForResource"
            ],
            "Resource": "*"
        }
    ]
}
Enter fullscreen mode Exit fullscreen mode

Use Cases for CSV Export

  1. Compliance reporting: Generate monthly untagged resource reports
  2. Cost analysis: Import into BI tools for cost allocation analysis
  3. Automation: Feed data into tagging automation scripts
  4. Tracking progress: Compare reports over time to measure tagging improvements
  5. Multi-account analysis: Combine reports from multiple AWS accounts

Advanced Analysis

Once you have the CSV, you can analyze it without external dependencies:

#!/usr/bin/env python3
import csv
from collections import Counter

def analyze_csv(filename):
    """Analyze untagged resources CSV without pandas"""
    resources = []

    try:
        with open(filename, 'r') as csvfile:
            reader = csv.DictReader(csvfile)
            resources = list(reader)
    except FileNotFoundError:
        print(f"File {filename} not found")
        return

    if not resources:
        print("No data found in CSV")
        return

    print(f"Total untagged resources: {len(resources)}")

    # Count by resource type
    resource_counts = Counter(row['Resource'] for row in resources)
    print("\nResources by type:")
    for resource_type, count in resource_counts.most_common():
        print(f"  {resource_type}: {count}")

    # Count by region
    region_counts = Counter(row['Region'] for row in resources)
    print("\nResources by region:")
    for region, count in region_counts.most_common():
        print(f"  {region}: {count}")

    # Filter EC2 instances
    ec2_instances = [row for row in resources if row['Resource'] == 'EC2 Instance']
    if ec2_instances:
        print(f"\nEC2 Instances ({len(ec2_instances)}):")
        for instance in ec2_instances[:5]:  # Show first 5
            print(f"  {instance['Region']}: {instance['ARN']}")
        if len(ec2_instances) > 5:
            print(f"  ... and {len(ec2_instances) - 5} more")

if __name__ == "__main__":
    # Find the latest CSV file
    import os
    import glob

    csv_files = glob.glob('output/untagged_resources_*.csv')
    if csv_files:
        latest_file = max(csv_files, key=os.path.getctime)
        print(f"Analyzing: {latest_file}\n")
        analyze_csv(latest_file)
    else:
        print("No CSV files found in output/ directory")
        print("Run get_untagged_resources_per_region_excel.py first")
Enter fullscreen mode Exit fullscreen mode

Or use the included analysis script:

python advanced_analysis.py
Enter fullscreen mode Exit fullscreen mode

Conclusion

Structured data export transforms untagged resource discovery from a one-time check into a systematic governance process. The CSV format enables integration with existing reporting workflows and provides the foundation for automated remediation efforts.

Regular exports help track tagging compliance over time and provide the data needed for informed decisions about AWS resource governance policies.

Top comments (0)