Why Track Newsletter Metrics?
Newsletters are a booming business. Platforms like Substack, Beehiiv, and ConvertKit host thousands of creators earning serious revenue. But comparing newsletter performance across platforms is nearly impossible without scraping.
Lets build a tracker that monitors newsletter growth, engagement, and monetization signals.
What We Track
- Subscriber counts (where public)
- Posting frequency and consistency
- Engagement signals (likes, comments, shares)
- Pricing tiers for paid newsletters
- Growth trajectory over time
Setup
pip install requests beautifulsoup4 pandas
Newsletter platforms use dynamic rendering. ScraperAPI handles JavaScript-heavy pages.
Scraping Substack Newsletters
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime
SCRAPER_API_KEY = "YOUR_KEY"
def scrape_substack_profile(newsletter_slug):
url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url=https://{newsletter_slug}.substack.com"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
subscribers = None
sub_el = soup.select_one(".pub-subscriber-count, [class*=subscriber]")
if sub_el:
match = re.search(r"[\d,]+", sub_el.text)
subscribers = int(match.group().replace(",", "")) if match else None
posts = []
for post in soup.select(".post-preview, article"):
title = post.select_one("h2, .post-title")
date = post.select_one("time, .post-date")
likes = post.select_one(".like-count, [class*=heart]")
posts.append({
"title": title.text.strip() if title else None,
"date": date.get("datetime", date.text.strip()) if date else None,
"likes": extract_number(likes) if likes else 0
})
return {
"newsletter": newsletter_slug,
"subscribers": subscribers,
"post_count": len(posts),
"posts": posts,
"scraped_at": datetime.now().isoformat()
}
def extract_number(el):
if not el:
return 0
match = re.search(r"\d+", el.text)
return int(match.group()) if match else 0
Tracking Beehiiv Newsletters
def scrape_beehiiv_newsletter(domain):
url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url=https://{domain}&render=true"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
posts = []
for article in soup.select("article, .post-item"):
title = article.select_one("h2, h3")
date = article.select_one("time")
posts.append({
"title": title.text.strip() if title else None,
"date": date.text.strip() if date else None
})
return {
"newsletter": domain,
"post_count": len(posts),
"posts": posts
}
Engagement Analysis
def analyze_engagement(newsletter_data):
posts = newsletter_data.get("posts", [])
if not posts:
return {}
df = pd.DataFrame(posts)
df["likes"] = df.get("likes", pd.Series([0]*len(df))).fillna(0)
return {
"newsletter": newsletter_data["newsletter"],
"total_posts": len(df),
"avg_likes": df["likes"].mean(),
"max_likes": df["likes"].max(),
"engagement_trend": "growing" if df["likes"].iloc[:5].mean() > df["likes"].mean() else "stable"
}
Growth Tracking Over Time
import json
import os
HISTORY_FILE = "newsletter_history.json"
def track_growth(newsletter_slug, current_data):
history = {}
if os.path.exists(HISTORY_FILE):
with open(HISTORY_FILE) as f:
history = json.load(f)
if newsletter_slug not in history:
history[newsletter_slug] = []
history[newsletter_slug].append({
"date": datetime.now().isoformat(),
"subscribers": current_data.get("subscribers"),
"post_count": current_data.get("post_count")
})
with open(HISTORY_FILE, "w") as f:
json.dump(history, f, indent=2)
snapshots = history[newsletter_slug]
if len(snapshots) >= 2:
prev = snapshots[-2].get("subscribers", 0) or 0
curr = current_data.get("subscribers", 0) or 0
if prev > 0:
growth = ((curr - prev) / prev) * 100
print(f"Growth since last check: {growth:.1f}%")
Batch Monitoring
def monitor_newsletters(slugs):
results = []
for slug in slugs:
try:
data = scrape_substack_profile(slug)
engagement = analyze_engagement(data)
track_growth(slug, data)
results.append({**data, **engagement})
except Exception as e:
print(f"Error scraping {slug}: {e}")
df = pd.DataFrame(results)
print(df[["newsletter", "subscribers", "total_posts", "avg_likes"]].to_string())
return df
newsletters = ["platformer", "stratechery", "thegeneralist", "lennysnewsletter"]
monitor_newsletters(newsletters)
Proxy Tips
Newsletter platforms detect scrapers. Use ThorData residential proxies for reliable access. Track success rates with ScrapeOps.
Conclusion
Newsletter analytics scraping reveals growth patterns invisible from the outside. With ScraperAPI and Python, you can build competitive intelligence for the creator economy.
Top comments (0)