How to Build a Trend Forecasting Tool with Social Scraping
Trends emerge on social media before hitting mainstream. By scraping platforms systematically, you can detect rising trends days before they become obvious.
Data Collection
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Velocity Engine
The key insight: acceleration matters more than volume.
class VelocityEngine:
def __init__(self, db):
self.db = db
def velocity(self, keyword, hours=24):
now = datetime.now()
cur_start = now - timedelta(hours=hours)
prev_start = cur_start - timedelta(hours=hours)
cur = self._count(keyword, cur_start, now)
prev = self._count(keyword, prev_start, cur_start)
if prev == 0: return float('inf') if cur > 0 else 0
return round((cur - prev) / prev, 4)
def _count(self, kw, start, end):
c = self.db.execute(
'SELECT COUNT(*) FROM mentions WHERE keyword=? AND timestamp BETWEEN ? AND ?',
(kw, start.isoformat(), end.isoformat()))
return c.fetchone()[0]
def engagement_velocity(self, keyword, hours=24):
now = datetime.now()
cur_start = now - timedelta(hours=hours)
prev_start = cur_start - timedelta(hours=hours)
cur = self._engagement(keyword, cur_start, now)
prev = self._engagement(keyword, prev_start, cur_start)
if prev == 0: return float('inf') if cur > 0 else 0
return round((cur - prev) / prev, 4)
def _engagement(self, kw, start, end):
c = self.db.execute(
'SELECT COALESCE(SUM(engagement),0) FROM mentions WHERE keyword=? AND timestamp BETWEEN ? AND ?',
(kw, start.isoformat(), end.isoformat()))
return c.fetchone()[0]
Breakout Detection
class BreakoutDetector:
def __init__(self, engine):
self.engine = engine
def detect(self, keywords, threshold=0.5):
results = []
for kw in keywords:
mv = self.engine.velocity(kw)
ev = self.engine.engagement_velocity(kw)
score = (mv * 0.4) + (ev * 0.6)
if score > threshold:
results.append({'keyword': kw, 'mention_vel': mv,
'engagement_vel': ev, 'score': round(score, 3)})
return sorted(results, key=lambda x: x['score'], reverse=True)
Trend Forecasting
class Forecaster:
def predict(self, keyword, db, points=7):
cursor = db.execute('''
SELECT DATE(timestamp), COUNT(*) FROM mentions
WHERE keyword=? GROUP BY DATE(timestamp)
ORDER BY DATE(timestamp) DESC LIMIT ?
''', (keyword, points))
data = cursor.fetchall()
if len(data) < 3: return {'prediction': 'insufficient_data'}
vols = [r[1] for r in reversed(data)]
n = len(vols)
xm = (n-1)/2
ym = sum(vols)/n
slope = sum((i-xm)*(v-ym) for i,v in enumerate(vols))
slope /= sum((i-xm)**2 for i in range(n))
rate = slope/ym if ym > 0 else 0
if rate > 0.15: p = 'accelerating'
elif rate > 0.05: p = 'growing'
elif rate > -0.05: p = 'stable'
else: p = 'declining'
return {'keyword': keyword, 'prediction': p, 'growth_rate': round(rate, 4)}
Social platforms are hard to scrape. ScraperAPI handles anti-bot protections. ThorData provides residential rotation. Track rates with ScrapeOps.
Follow for more Python data science tutorials.
Top comments (0)