MinIO Has a Free API: Here's How to Use It for S3-Compatible Object Storage

#minio #s3 #storage #devops

MinIO is a high-performance, S3-compatible object storage server you can run anywhere — your laptop, Kubernetes, or bare metal. Since it's 100% S3-compatible, you can use the same AWS SDK code you already know.

Why Use MinIO?

S3-compatible — use existing AWS tools and SDKs
Self-hosted — your data stays on your infrastructure
Free and open-source — no storage costs or egress fees
Fast — designed for AI/ML workloads and big data

Getting Started

# Run MinIO with Docker
docker run -p 9000:9000 -p 9001:9001 \
  -e MINIO_ROOT_USER=minioadmin \
  -e MINIO_ROOT_PASSWORD=minioadmin \
  minio/minio server /data --console-address ":9001"

# Use AWS CLI (it works with MinIO!)
aws --endpoint-url http://localhost:9000 s3 mb s3://my-bucket
aws --endpoint-url http://localhost:9000 s3 cp myfile.txt s3://my-bucket/
aws --endpoint-url http://localhost:9000 s3 ls s3://my-bucket/

Python Client (boto3)

import boto3
from botocore.client import Config

s3 = boto3.client('s3',
    endpoint_url='http://localhost:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin',
    config=Config(signature_version='s3v4')
)

# Create bucket
s3.create_bucket(Bucket='my-data')

# Upload file
s3.upload_file('report.pdf', 'my-data', 'reports/2026/march.pdf')

# Upload from memory
import json
data = json.dumps({'key': 'value', 'count': 42})
s3.put_object(Bucket='my-data', Key='config.json', Body=data, ContentType='application/json')

# List files
response = s3.list_objects_v2(Bucket='my-data', Prefix='reports/')
for obj in response.get('Contents', []):
    print(f"{obj['Key']:40s} {obj['Size']:>10d} bytes  {obj['LastModified']}")

# Download file
s3.download_file('my-data', 'reports/2026/march.pdf', 'downloaded.pdf')

MinIO Python SDK

from minio import Minio
from minio.error import S3Error
import io

client = Minio('localhost:9000', access_key='minioadmin', secret_key='minioadmin', secure=False)

# Create bucket with versioning
if not client.bucket_exists('my-bucket'):
    client.make_bucket('my-bucket')

# Upload with metadata
client.put_object(
    'my-bucket', 'data/users.json',
    io.BytesIO(b'{"users": []}'),
    length=14,
    content_type='application/json',
    metadata={'x-amz-meta-source': 'api', 'x-amz-meta-version': '1.0'}
)

# Generate presigned URL (shareable link)
from datetime import timedelta
url = client.presigned_get_object('my-bucket', 'data/users.json', expires=timedelta(hours=1))
print(f"Download URL (valid 1 hour): {url}")

# Upload presigned URL
upload_url = client.presigned_put_object('my-bucket', 'uploads/new-file.txt', expires=timedelta(hours=1))
print(f"Upload URL: {upload_url}")

Event Notifications

# Listen for bucket events
import json

events = client.listen_bucket_notification('my-bucket', prefix='uploads/', events=['s3:ObjectCreated:*'])

for event in events:
    for record in event.get('Records', []):
        key = record['s3']['object']['key']
        size = record['s3']['object']['size']
        print(f"New file uploaded: {key} ({size} bytes)")
        # Process the file...

Data Pipeline: ETL with MinIO

import pandas as pd
import io

def etl_pipeline(source_bucket, dest_bucket, source_key):
    # Extract: read CSV from MinIO
    response = s3.get_object(Bucket=source_bucket, Key=source_key)
    df = pd.read_csv(io.BytesIO(response['Body'].read()))
    print(f"Extracted {len(df)} rows from {source_key}")

    # Transform
    df['total'] = df['quantity'] * df['price']
    df['date'] = pd.to_datetime(df['date'])
    summary = df.groupby(df['date'].dt.to_period('M')).agg({
        'total': ['sum', 'mean', 'count']
    }).reset_index()

    # Load: write results back to MinIO
    csv_buffer = io.BytesIO()
    summary.to_csv(csv_buffer, index=False)
    csv_buffer.seek(0)

    s3.put_object(
        Bucket=dest_bucket,
        Key=f"reports/monthly-summary.csv",
        Body=csv_buffer.getvalue(),
        ContentType='text/csv'
    )
    print(f"Summary saved to {dest_bucket}/reports/monthly-summary.csv")

etl_pipeline('raw-data', 'processed-data', 'sales/2026-q1.csv')

Lifecycle Policies

import json

lifecycle_config = {
    'Rules': [{
        'ID': 'expire-old-logs',
        'Status': 'Enabled',
        'Filter': {'Prefix': 'logs/'},
        'Expiration': {'Days': 30}
    }, {
        'ID': 'archive-reports',
        'Status': 'Enabled',
        'Filter': {'Prefix': 'reports/'},
        'Transitions': [{
            'Days': 90,
            'StorageClass': 'GLACIER'
        }]
    }]
}

s3.put_bucket_lifecycle_configuration(Bucket='my-data', LifecycleConfiguration=lifecycle_config)
print("Lifecycle policy applied!")

Real-World Use Case

A machine learning team stored 50TB of training data on AWS S3 — costing $1,150/month in storage alone plus egress fees. They migrated to MinIO running on their existing GPU servers. Same S3 API code, zero code changes. Storage costs dropped to $0, and data access became 10x faster since it was local.