Scraping Twitch: Stream Data, Viewers, and Clip Analytics
Twitch dominates live streaming with 140+ million monthly active users. Whether building analytics dashboards or tracking content trends, Twitch data is incredibly valuable.
Setting Up the Data Collector
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Collecting via Helix API
def get_streams(self, game_id=None, lang='en', first=100):
params = {'first': first, 'language': lang}
if game_id: params['game_id'] = game_id
resp = self.api.get('https://api.twitch.tv/helix/streams', params=params)
streams = resp.json().get('data', [])
for s in streams:
self.db.execute(
'INSERT INTO streams (channel,game,title,viewers) VALUES (?,?,?,?)',
(s['user_name'], s['game_name'], s['title'], s['viewer_count']))
self.db.commit()
return streams
def get_clips(self, broadcaster_id, first=20):
resp = self.api.get('https://api.twitch.tv/helix/clips',
params={'broadcaster_id': broadcaster_id, 'first': first})
clips = resp.json().get('data', [])
for c in clips:
self.db.execute('''INSERT OR REPLACE INTO clips
(id,channel,title,views,duration,created_at,game)
VALUES (?,?,?,?,?,?,?)''',
(c['id'], c['broadcaster_name'], c['title'],
c['view_count'], c['duration'], c['created_at'], c['game_id']))
self.db.commit()
return clips
Scraping Category Trends
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Analytics Reports
class TwitchAnalytics:
def __init__(self, db):
self.db = db
def growth_report(self, channel, days=30):
cursor = self.db.execute('''
SELECT DATE(timestamp) as d, AVG(viewers), MAX(viewers), COUNT(*)
FROM streams WHERE channel=? AND timestamp > datetime('now',?)
GROUP BY d ORDER BY d
''', (channel, f'-{days} days'))
return [{'date': r[0], 'avg': round(r[1]), 'peak': r[2], 'samples': r[3]}
for r in cursor.fetchall()]
def trending(self, hours=6):
cursor = self.db.execute('''
SELECT game, SUM(viewers), COUNT(DISTINCT channel), AVG(viewers)
FROM streams WHERE timestamp > datetime('now',?)
GROUP BY game ORDER BY SUM(viewers) DESC LIMIT 20
''', (f'-{hours} hours',))
return [{'game': r[0], 'total': r[1], 'streams': r[2], 'avg': round(r[3])}
for r in cursor.fetchall()]
Continuous Monitoring
def monitor(collector, channels, interval=300):
analytics = TwitchAnalytics(collector.db)
while True:
streams = collector.get_streams()
print(f"[{datetime.now()}] Tracked {len(streams)} streams")
for ch in channels:
clips = collector.get_clips(ch)
print(f" {ch}: {len(clips)} clips")
trending = analytics.trending()
print("Top:", [t['game'] for t in trending[:5]])
time.sleep(interval)
For thousands of channels, ScraperAPI handles JavaScript rendering. ThorData provides residential proxies. Track performance with ScrapeOps.
Follow for more Python scraping and analytics tutorials.
Top comments (0)