Podcast analytics are notoriously opaque. Platforms guard their data closely. But by scraping public charts from Spotify, Apple Podcasts, and Chartable, you can build a comprehensive picture of what's trending in audio content.
What We'll Track
- Top podcast rankings by category
- Chart movement and velocity
- Cross-platform performance comparisons
- Genre and category trends
Setup
pip install requests beautifulsoup4 pandas matplotlib
Apple Podcasts Chart Scraper
Apple Podcasts exposes chart data via a public RSS-like endpoint:
import requests
import time
from datetime import datetime
def scrape_apple_podcast_charts(genre_id=26, country="us", limit=100):
"""Scrape Apple Podcasts top charts.
Genre IDs: 26=All, 1301=Arts, 1303=Comedy, 1304=Education,
1307=Health, 1309=TV, 1311=Music, 1314=Religion,
1318=Technology, 1321=Business, 1324=Society, 1325=News
"""
url = (
f"https://rss.applemarketingtools.com/api/v2/{country}"
f"/podcasts/top/{limit}/genre={genre_id}/json"
)
response = requests.get(url, timeout=15)
data = response.json()
podcasts = []
for i, result in enumerate(data.get("feed", {}).get("results", []), 1):
podcasts.append({
"rank": i,
"name": result.get("name", ""),
"artist": result.get("artistName", ""),
"apple_id": result.get("id", ""),
"url": result.get("url", ""),
"genre": (
result.get("genres", [{}])[0].get("name", "")
if result.get("genres") else ""
),
"platform": "apple_podcasts",
"country": country,
"scraped_at": datetime.now().isoformat()
})
return podcasts
Spotify Podcast Charts
from bs4 import BeautifulSoup
SCRAPER_API_KEY = "YOUR_KEY"
def scrape_spotify_podcast_charts(country="us"):
"""Scrape Spotify podcast charts page."""
url = "https://podcastcharts.byspotify.com/"
api_url = (
f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}"
f"&url={url}&render=true"
)
response = requests.get(api_url, timeout=60)
soup = BeautifulSoup(response.text, "html.parser")
podcasts = []
for i, item in enumerate(soup.select(".chart-item, [data-testid='chart-row']"), 1):
title = item.select_one(".show-name, .chart-title")
publisher = item.select_one(".publisher, .chart-subtitle")
if title:
podcasts.append({
"rank": i,
"name": title.text.strip(),
"artist": publisher.text.strip() if publisher else "",
"platform": "spotify",
"country": country,
"scraped_at": datetime.now().isoformat()
})
return podcasts
Chartable Data
def scrape_chartable(category="overall", country="us"):
"""Scrape Chartable podcast rankings."""
url = f"https://chartable.com/charts/itunes/{country}-{category}-podcasts"
api_url = (
f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}"
f"&url={url}&render=true"
)
response = requests.get(api_url, timeout=60)
soup = BeautifulSoup(response.text, "html.parser")
podcasts = []
for row in soup.select("table tbody tr"):
cols = row.select("td")
if len(cols) >= 3:
rank_el = cols[0]
name_el = cols[1].select_one("a")
movement = cols[0].select_one(".movement")
if name_el:
podcasts.append({
"rank": (
int(rank_el.text.strip().split()[0])
if rank_el.text.strip() else 0
),
"name": name_el.text.strip(),
"movement": movement.text.strip() if movement else "--",
"platform": "chartable",
"category": category,
"country": country,
"scraped_at": datetime.now().isoformat()
})
return podcasts
Cross-Platform Analysis
import pandas as pd
def collect_all_charts():
"""Collect charts from all platforms."""
all_data = []
genres = {26: "All", 1318: "Technology", 1321: "Business", 1325: "News"}
for genre_id, genre_name in genres.items():
print(f"Apple Podcasts: {genre_name}")
data = scrape_apple_podcast_charts(genre_id=genre_id)
all_data.extend(data)
time.sleep(2)
print("Spotify charts")
spotify = scrape_spotify_podcast_charts()
all_data.extend(spotify)
time.sleep(3)
for category in ["overall", "technology", "business"]:
print(f"Chartable: {category}")
chartable = scrape_chartable(category)
all_data.extend(chartable)
time.sleep(3)
return all_data
def analyze_cross_platform(data):
df = pd.DataFrame(data)
print("=== Cross-Platform Podcast Rankings ===\n")
name_counts = df.groupby("name")["platform"].nunique()
multi_platform = name_counts[name_counts > 1].index
print(f"Podcasts on multiple platforms: {len(multi_platform)}\n")
for name in list(multi_platform)[:15]:
entries = df[df["name"] == name][["platform", "rank"]].to_dict("records")
platforms = ", ".join(f"{e['platform']} #{e['rank']}" for e in entries)
print(f" {name}: {platforms}")
print("\n=== By Genre/Category ===\n")
if "genre" in df.columns:
genre_counts = df[df["genre"] != ""].groupby("genre").size()
print(genre_counts.sort_values(ascending=False).head(10))
data = collect_all_charts()
analyze_cross_platform(data)
Tracking Changes Over Time
import sqlite3
def track_chart_movement(db_path="podcast_charts.db"):
"""Store and track chart positions over time."""
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS chart_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT, rank INTEGER, platform TEXT,
category TEXT, country TEXT, scraped_at TEXT,
UNIQUE(name, platform, category, scraped_at)
)
""")
data = collect_all_charts()
for item in data:
try:
conn.execute(
"""INSERT INTO chart_history
(name, rank, platform, category, country, scraped_at)
VALUES (?, ?, ?, ?, ?, ?)""",
(item["name"], item["rank"], item["platform"],
item.get("category", ""), item.get("country", "us"),
item["scraped_at"])
)
except sqlite3.IntegrityError:
pass
conn.commit()
df = pd.read_sql("SELECT * FROM chart_history", conn)
print(f"\nTotal chart entries tracked: {len(df)}")
conn.close()
Proxy Strategy
Apple Podcasts RSS endpoints are open and don't need proxies. But Spotify and Chartable require help. ScraperAPI with JS rendering handles both platforms well. For geo-specific charts, ThorData residential proxies let you scrape from different countries. Monitor pipeline health with ScrapeOps.
Conclusion
Podcast chart data reveals cultural trends, advertising opportunities, and content strategy insights. With cross-platform tracking, you see the full picture that no single platform gives you on its own.
Top comments (0)