DEV Community

agenthustler
agenthustler

Posted on

How to Build a Newsletter Analytics Tracker with Python

Why Track Newsletter Metrics?

Newsletters are a booming business. Platforms like Substack, Beehiiv, and ConvertKit host thousands of creators earning serious revenue. But comparing newsletter performance across platforms is nearly impossible without scraping.

Lets build a tracker that monitors newsletter growth, engagement, and monetization signals.

What We Track

  • Subscriber counts (where public)
  • Posting frequency and consistency
  • Engagement signals (likes, comments, shares)
  • Pricing tiers for paid newsletters
  • Growth trajectory over time

Setup

pip install requests beautifulsoup4 pandas
Enter fullscreen mode Exit fullscreen mode

Newsletter platforms use dynamic rendering. ScraperAPI handles JavaScript-heavy pages.

Scraping Substack Newsletters

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime

SCRAPER_API_KEY = "YOUR_KEY"

def scrape_substack_profile(newsletter_slug):
    url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url=https://{newsletter_slug}.substack.com"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    subscribers = None
    sub_el = soup.select_one(".pub-subscriber-count, [class*=subscriber]")
    if sub_el:
        match = re.search(r"[\d,]+", sub_el.text)
        subscribers = int(match.group().replace(",", "")) if match else None

    posts = []
    for post in soup.select(".post-preview, article"):
        title = post.select_one("h2, .post-title")
        date = post.select_one("time, .post-date")
        likes = post.select_one(".like-count, [class*=heart]")
        posts.append({
            "title": title.text.strip() if title else None,
            "date": date.get("datetime", date.text.strip()) if date else None,
            "likes": extract_number(likes) if likes else 0
        })

    return {
        "newsletter": newsletter_slug,
        "subscribers": subscribers,
        "post_count": len(posts),
        "posts": posts,
        "scraped_at": datetime.now().isoformat()
    }

def extract_number(el):
    if not el:
        return 0
    match = re.search(r"\d+", el.text)
    return int(match.group()) if match else 0
Enter fullscreen mode Exit fullscreen mode

Tracking Beehiiv Newsletters

def scrape_beehiiv_newsletter(domain):
    url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url=https://{domain}&render=true"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    posts = []
    for article in soup.select("article, .post-item"):
        title = article.select_one("h2, h3")
        date = article.select_one("time")
        posts.append({
            "title": title.text.strip() if title else None,
            "date": date.text.strip() if date else None
        })

    return {
        "newsletter": domain,
        "post_count": len(posts),
        "posts": posts
    }
Enter fullscreen mode Exit fullscreen mode

Engagement Analysis

def analyze_engagement(newsletter_data):
    posts = newsletter_data.get("posts", [])
    if not posts:
        return {}

    df = pd.DataFrame(posts)
    df["likes"] = df.get("likes", pd.Series([0]*len(df))).fillna(0)

    return {
        "newsletter": newsletter_data["newsletter"],
        "total_posts": len(df),
        "avg_likes": df["likes"].mean(),
        "max_likes": df["likes"].max(),
        "engagement_trend": "growing" if df["likes"].iloc[:5].mean() > df["likes"].mean() else "stable"
    }
Enter fullscreen mode Exit fullscreen mode

Growth Tracking Over Time

import json
import os

HISTORY_FILE = "newsletter_history.json"

def track_growth(newsletter_slug, current_data):
    history = {}
    if os.path.exists(HISTORY_FILE):
        with open(HISTORY_FILE) as f:
            history = json.load(f)

    if newsletter_slug not in history:
        history[newsletter_slug] = []

    history[newsletter_slug].append({
        "date": datetime.now().isoformat(),
        "subscribers": current_data.get("subscribers"),
        "post_count": current_data.get("post_count")
    })

    with open(HISTORY_FILE, "w") as f:
        json.dump(history, f, indent=2)

    snapshots = history[newsletter_slug]
    if len(snapshots) >= 2:
        prev = snapshots[-2].get("subscribers", 0) or 0
        curr = current_data.get("subscribers", 0) or 0
        if prev > 0:
            growth = ((curr - prev) / prev) * 100
            print(f"Growth since last check: {growth:.1f}%")
Enter fullscreen mode Exit fullscreen mode

Batch Monitoring

def monitor_newsletters(slugs):
    results = []
    for slug in slugs:
        try:
            data = scrape_substack_profile(slug)
            engagement = analyze_engagement(data)
            track_growth(slug, data)
            results.append({**data, **engagement})
        except Exception as e:
            print(f"Error scraping {slug}: {e}")

    df = pd.DataFrame(results)
    print(df[["newsletter", "subscribers", "total_posts", "avg_likes"]].to_string())
    return df

newsletters = ["platformer", "stratechery", "thegeneralist", "lennysnewsletter"]
monitor_newsletters(newsletters)
Enter fullscreen mode Exit fullscreen mode

Proxy Tips

Newsletter platforms detect scrapers. Use ThorData residential proxies for reliable access. Track success rates with ScrapeOps.

Conclusion

Newsletter analytics scraping reveals growth patterns invisible from the outside. With ScraperAPI and Python, you can build competitive intelligence for the creator economy.

Top comments (0)