Managing AWS resources without proper tagging can quickly become a nightmare for cost allocation, compliance, and governance. This Python script scans all AWS regions and exports untagged resources to a structured CSV file for easy analysis and reporting.
Why Export to CSV?
While console output is useful for quick checks, CSV export provides:
- Structured data for spreadsheet analysis
- Audit trails with timestamped files
- Reporting capabilities for management
- Integration with other tools and systems
- Historical tracking of untagged resources over time
The Solution
This script builds on basic resource scanning by adding structured data export with proper ARN construction for each resource type.
#!/usr/bin/env python3
import boto3
import csv
import os
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
def get_untagged_resources_in_region(region, account_id):
resources = []
try:
session = boto3.Session()
# EC2 Instances
ec2 = session.client('ec2', region_name=region)
instances = ec2.describe_instances()
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
if not instance.get('Tags'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'EC2 Instance',
'ARN': f"arn:aws:ec2:{region}:{account_id}:instance/{instance['InstanceId']}"
})
# EBS Volumes
volumes = ec2.describe_volumes()
for volume in volumes['Volumes']:
if not volume.get('Tags'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'EBS Volume',
'ARN': f"arn:aws:ec2:{region}:{account_id}:volume/{volume['VolumeId']}"
})
# VPCs
vpcs = ec2.describe_vpcs()
for vpc in vpcs['Vpcs']:
if not vpc.get('Tags'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'VPC',
'ARN': f"arn:aws:ec2:{region}:{account_id}:vpc/{vpc['VpcId']}"
})
# Security Groups
security_groups = ec2.describe_security_groups()
for sg in security_groups['SecurityGroups']:
if not sg.get('Tags'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'Security Group',
'ARN': f"arn:aws:ec2:{region}:{account_id}:security-group/{sg['GroupId']}"
})
# Subnets
subnets = ec2.describe_subnets()
for subnet in subnets['Subnets']:
if not subnet.get('Tags'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'Subnet',
'ARN': f"arn:aws:ec2:{region}:{account_id}:subnet/{subnet['SubnetId']}"
})
# Lambda Functions
lambda_client = session.client('lambda', region_name=region)
functions = lambda_client.list_functions()
for function in functions['Functions']:
try:
tags = lambda_client.list_tags(Resource=function['FunctionArn'])
if not tags.get('Tags'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'Lambda Function',
'ARN': function['FunctionArn']
})
except:
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'Lambda Function',
'ARN': function['FunctionArn']
})
# RDS Instances
rds = session.client('rds', region_name=region)
instances = rds.describe_db_instances()
for instance in instances['DBInstances']:
try:
tags = rds.list_tags_for_resource(ResourceName=instance['DBInstanceArn'])
if not tags.get('TagList'):
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'RDS Instance',
'ARN': instance['DBInstanceArn']
})
except:
resources.append({
'Account': account_id,
'Region': region,
'Resource': 'RDS Instance',
'ARN': instance['DBInstanceArn']
})
except:
pass
return resources
def main():
session = boto3.Session()
# Get account ID
sts = session.client('sts')
account_id = sts.get_caller_identity()['Account']
# Get regions
regions = [r['RegionName'] for r in session.client('ec2').describe_regions()['Regions']]
all_resources = []
with ThreadPoolExecutor(max_workers=15) as executor:
futures = [executor.submit(get_untagged_resources_in_region, region, account_id) for region in regions]
for future in as_completed(futures):
resources = future.result()
all_resources.extend(resources)
# S3 Buckets (global)
try:
s3 = session.client('s3')
buckets = s3.list_buckets()
for bucket in buckets['Buckets']:
try:
s3.get_bucket_tagging(Bucket=bucket['Name'])
except:
all_resources.append({
'Account': account_id,
'Region': 'Global',
'Resource': 'S3 Bucket',
'ARN': f"arn:aws:s3:::{bucket['Name']}"
})
except:
pass
# Create output directory and filename
os.makedirs('output', exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'output/untagged_resources_{timestamp}.csv'
# Export to CSV
if all_resources:
with open(filename, 'w', newline='') as csvfile:
fieldnames = ['Account', 'Region', 'Resource', 'ARN']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_resources)
print(f"Exported {len(all_resources)} untagged resources to {filename}")
else:
print("No untagged resources found")
if __name__ == "__main__":
main()
Key Features
- Account identification: Uses STS to get current AWS account ID
- Proper ARN construction: Builds correct ARNs for each resource type
-
Timestamped output: Creates unique files with
YYYYMMDD_HHMMSS
format -
Organized storage: Saves files in
output/
directory - CSV format: Easy to import into Excel, Google Sheets, or databases
CSV Output Structure
The exported CSV contains four columns:
Account | Region | Resource | ARN |
---|---|---|---|
123456789012 | us-east-1 | EC2 Instance | arn:aws:ec2:us-east-1:123456789012:instance/i-1234567890abcdef0 |
123456789012 | us-west-2 | S3 Bucket | arn:aws:s3:::my-untagged-bucket |
123456789012 | eu-west-1 | RDS Instance | arn:aws:rds:eu-west-1:123456789012:db:mydb |
Setup and Usage
- Install boto3:
pip install boto3
- Configure AWS credentials:
aws configure
- Run the script:
python get_untagged_resources_per_region_excel.py
- Find your report:
output/untagged_resources_20241220_143052.csv
Required IAM Permissions
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"sts:GetCallerIdentity",
"ec2:DescribeInstances",
"ec2:DescribeVolumes",
"ec2:DescribeVpcs",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeRegions",
"s3:ListAllMyBuckets",
"s3:GetBucketTagging",
"lambda:ListFunctions",
"lambda:ListTags",
"rds:DescribeDBInstances",
"rds:ListTagsForResource"
],
"Resource": "*"
}
]
}
Use Cases for CSV Export
- Compliance reporting: Generate monthly untagged resource reports
- Cost analysis: Import into BI tools for cost allocation analysis
- Automation: Feed data into tagging automation scripts
- Tracking progress: Compare reports over time to measure tagging improvements
- Multi-account analysis: Combine reports from multiple AWS accounts
Advanced Analysis
Once you have the CSV, you can analyze it without external dependencies:
#!/usr/bin/env python3
import csv
from collections import Counter
def analyze_csv(filename):
"""Analyze untagged resources CSV without pandas"""
resources = []
try:
with open(filename, 'r') as csvfile:
reader = csv.DictReader(csvfile)
resources = list(reader)
except FileNotFoundError:
print(f"File {filename} not found")
return
if not resources:
print("No data found in CSV")
return
print(f"Total untagged resources: {len(resources)}")
# Count by resource type
resource_counts = Counter(row['Resource'] for row in resources)
print("\nResources by type:")
for resource_type, count in resource_counts.most_common():
print(f" {resource_type}: {count}")
# Count by region
region_counts = Counter(row['Region'] for row in resources)
print("\nResources by region:")
for region, count in region_counts.most_common():
print(f" {region}: {count}")
# Filter EC2 instances
ec2_instances = [row for row in resources if row['Resource'] == 'EC2 Instance']
if ec2_instances:
print(f"\nEC2 Instances ({len(ec2_instances)}):")
for instance in ec2_instances[:5]: # Show first 5
print(f" {instance['Region']}: {instance['ARN']}")
if len(ec2_instances) > 5:
print(f" ... and {len(ec2_instances) - 5} more")
if __name__ == "__main__":
# Find the latest CSV file
import os
import glob
csv_files = glob.glob('output/untagged_resources_*.csv')
if csv_files:
latest_file = max(csv_files, key=os.path.getctime)
print(f"Analyzing: {latest_file}\n")
analyze_csv(latest_file)
else:
print("No CSV files found in output/ directory")
print("Run get_untagged_resources_per_region_excel.py first")
Or use the included analysis script:
python advanced_analysis.py
Conclusion
Structured data export transforms untagged resource discovery from a one-time check into a systematic governance process. The CSV format enables integration with existing reporting workflows and provides the foundation for automated remediation efforts.
Regular exports help track tagging compliance over time and provide the data needed for informed decisions about AWS resource governance policies.
Top comments (0)