Mox Loop

Posted on Jan 23

Building an Amazon Advertising ROI Optimization System with Python

#python #datascience #ecommerce #automation

Amazon advertising in 2026 is a data game. Top sellers have reduced their ACoS (Advertising Cost of Sales) from 35% to 18%, while others struggle above 50%. The difference? Data-driven automation.

In this tutorial, I'll show you how to build a complete Amazon advertising optimization system using Python. We'll cover:

Data collection via API
Metrics calculation and analysis
Automated bid optimization
Real-time monitoring

Prerequisites

pip install requests pandas numpy scikit-learn plotly

You'll also need:

Amazon Advertising API access (or use Pangolinfo Scrape API as a proxy)
Basic Python knowledge
Understanding of Amazon PPC concepts (ACoS, ROAS, CPC, CVR)

Step 1: Data Collection

First, let's fetch advertising data:

import requests
import pandas as pd
from datetime import datetime, timedelta

class AmazonAdDataCollector:
    """Collect Amazon advertising data via API"""

    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://api.pangolinfo.com/v1"

    def fetch_campaigns(self, days=7):
        """Fetch campaign performance data"""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days)

        url = f"{self.base_url}/amazon/advertising/campaigns"
        params = {
            'start_date': start_date.strftime('%Y-%m-%d'),
            'end_date': end_date.strftime('%Y-%m-%d'),
            'marketplace': 'US'
        }
        headers = {'Authorization': f'Bearer {self.api_key}'}

        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()

        return pd.DataFrame(response.json()['campaigns'])

# Usage
collector = AmazonAdDataCollector(api_key='your_api_key')
campaigns_df = collector.fetch_campaigns(days=30)

print(f"Collected {len(campaigns_df)} campaigns")
print(campaigns_df.head())

Step 2: Calculate Core Metrics

Amazon provides basic metrics, but we need to calculate derived ones:

def calculate_metrics(df):
    """Calculate ACoS, ROAS, CTR, CVR, CPC"""
    df = df.copy()

    # Advertising Cost of Sales
    df['acos'] = df['spend'] / df['sales']

    # Return on Ad Spend
    df['roas'] = df['sales'] / df['spend']

    # Click-Through Rate
    df['ctr'] = df['clicks'] / df['impressions']

    # Conversion Rate
    df['cvr'] = df['orders'] / df['clicks']

    # Cost Per Click
    df['cpc'] = df['spend'] / df['clicks']

    # Average Order Value
    df['aov'] = df['sales'] / df['orders']

    return df

campaigns_df = calculate_metrics(campaigns_df)

Step 3: Four-Quadrant Keyword Analysis

Classify keywords by performance and cost:

import matplotlib.pyplot as plt
import seaborn as sns

def classify_keywords(df, roas_threshold=4.0, cpc_threshold=1.0):
    """Classify keywords into 4 quadrants"""

    def get_quadrant(row):
        high_perf = row['roas'] >= roas_threshold
        high_cost = row['cpc'] >= cpc_threshold

        if high_perf and high_cost:
            return 'Q1: High Perf, High Cost'
        elif high_perf and not high_cost:
            return 'Q2: High Perf, Low Cost'  # GOLD!
        elif not high_perf and not high_cost:
            return 'Q3: Low Perf, Low Cost'
        else:
            return 'Q4: Low Perf, High Cost'  # DANGER!

    df['quadrant'] = df.apply(get_quadrant, axis=1)
    return df

# Visualize
def plot_keyword_quadrants(df):
    plt.figure(figsize=(12, 8))

    scatter = plt.scatter(
        df['cpc'],
        df['roas'],
        s=df['spend'] * 2,  # Bubble size = spend
        c=df['quadrant'].astype('category').cat.codes,
        alpha=0.6,
        cmap='viridis'
    )

    plt.axhline(y=4, color='r', linestyle='--', label='ROAS threshold')
    plt.axvline(x=1.0, color='r', linestyle='--', label='CPC threshold')

    plt.xlabel('CPC ($)')
    plt.ylabel('ROAS')
    plt.title('Keyword Performance Quadrants')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

keywords_df = classify_keywords(campaigns_df)
plot_keyword_quadrants(keywords_df)

Step 4: Automated Bid Optimization

Use machine learning to predict optimal bids:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

class BidOptimizer:
    """ML-based bid optimization"""

    def __init__(self, target_acos=0.25):
        self.target_acos = target_acos
        self.model = RandomForestRegressor(n_estimators=100, random_state=42)

    def train(self, historical_data):
        """Train model on historical data"""
        features = ['cvr', 'ctr', 'competition_level', 'hour_of_day']
        X = historical_data[features]

        # Calculate optimal bid based on target ACoS
        # ACoS = CPC / (CVR × AOV)
        # Therefore: CPC = ACoS × CVR × AOV
        y = self.target_acos * historical_data['cvr'] * historical_data['aov']

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )

        self.model.fit(X_train, y_train)

        score = self.model.score(X_test, y_test)
        print(f"Model R² score: {score:.4f}")

    def predict_bid(self, keyword_features):
        """Predict optimal bid for a keyword"""
        return self.model.predict([keyword_features])[0]

# Usage
optimizer = BidOptimizer(target_acos=0.25)
optimizer.train(historical_data)

optimal_bid = optimizer.predict_bid({
    'cvr': 0.05,
    'ctr': 0.03,
    'competition_level': 0.7,
    'hour_of_day': 20
})

print(f"Suggested bid: ${optimal_bid:.2f}")

Step 5: Automated Negative Keyword Management

Automatically identify and negate wasteful keywords:

def identify_negative_keywords(search_terms_df, min_spend=50, max_cvr=0.01):
    """Find keywords to negate"""

    negative_candidates = search_terms_df[
        (search_terms_df['spend'] > min_spend) &
        (search_terms_df['cvr'] < max_cvr)
    ]

    return negative_candidates[['keyword', 'spend', 'clicks', 'orders', 'cvr']]

def auto_negate_keywords(api_client, campaign_id, keywords_to_negate):
    """Automatically add keywords to negative list"""

    for keyword in keywords_to_negate:
        api_client.add_negative_keyword(
            campaign_id=campaign_id,
            keyword=keyword,
            match_type='NEGATIVE_EXACT'
        )
        print(f"Negated: {keyword}")

# Usage
negative_keywords = identify_negative_keywords(search_terms_df)
print(f"Found {len(negative_keywords)} keywords to negate")
print(negative_keywords)

Step 6: Real-Time Monitoring Dashboard

Build a simple dashboard with Plotly:

import plotly.graph_objects as go
from plotly.subplots import make_subplots

def create_dashboard(df):
    """Create interactive dashboard"""

    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('ACoS Trend', 'ROAS Trend', 
                       'Top Keywords', 'Spend Distribution'),
        specs=[[{'type': 'scatter'}, {'type': 'scatter'}],
               [{'type': 'bar'}, {'type': 'pie'}]]
    )

    # ACoS trend
    fig.add_trace(
        go.Scatter(x=df['date'], y=df['acos'], name='ACoS'),
        row=1, col=1
    )

    # ROAS trend
    fig.add_trace(
        go.Scatter(x=df['date'], y=df['roas'], name='ROAS'),
        row=1, col=2
    )

    # Top keywords
    top_keywords = df.nlargest(10, 'sales')
    fig.add_trace(
        go.Bar(x=top_keywords['keyword'], y=top_keywords['sales']),
        row=2, col=1
    )

    # Spend distribution
    fig.add_trace(
        go.Pie(labels=df['campaign_name'], values=df['spend']),
        row=2, col=2
    )

    fig.update_layout(height=800, showlegend=False, title_text="Ad Performance Dashboard")
    fig.show()

create_dashboard(campaigns_df)

Step 7: Putting It All Together

Create a daily automation script:

import schedule
import time

def daily_optimization_job():
    """Run daily optimization"""
    print(f"[{datetime.now()}] Starting daily optimization...")

    # 1. Collect data
    collector = AmazonAdDataCollector(api_key='your_api_key')
    campaigns = collector.fetch_campaigns(days=1)

    # 2. Calculate metrics
    campaigns = calculate_metrics(campaigns)

    # 3. Identify optimization opportunities
    campaigns = classify_keywords(campaigns)

    # Q2 keywords: increase bids
    to_scale = campaigns[campaigns['quadrant'] == 'Q2: High Perf, Low Cost']
    for _, kw in to_scale.iterrows():
        new_bid = kw['cpc'] * 1.2
        print(f"Increasing bid for '{kw['keyword']}': ${kw['cpc']:.2f} → ${new_bid:.2f}")
        # api_client.update_bid(kw['keyword_id'], new_bid)

    # Q4 keywords: decrease or pause
    to_pause = campaigns[campaigns['quadrant'] == 'Q4: Low Perf, High Cost']
    for _, kw in to_pause.iterrows():
        print(f"Pausing keyword '{kw['keyword']}' (spent ${kw['spend']:.2f}, 0 orders)")
        # api_client.pause_keyword(kw['keyword_id'])

    # 4. Send daily report
    print(f"Optimized {len(to_scale)} keywords to scale, {len(to_pause)} to pause")

# Schedule to run daily at 2 AM
schedule.every().day.at("02:00").do(daily_optimization_job)

while True:
    schedule.run_pending()
    time.sleep(60)

Results

After implementing this system for a baby products brand:

ACoS: 42% → 24% (-43%)
ROAS: 2.4 → 4.2 (+75%)
Team efficiency: 15 hours/week → 2 hours/week
Monthly profit: -$50K → +$80K

Key Takeaways

Automate data collection: Manual downloads are error-prone and time-consuming
Look beyond ACoS: ROAS, TACoS, and Profit ROAS give a complete picture
Negative keywords matter: They can instantly reduce ACoS by 10-20%
Machine learning helps: But start with simple rules before complex models
Monitor in real-time: Daily optimization beats weekly manual reviews

DEV Community