How to Build a Trend Forecasting Tool with Social Scraping

#python #webdev #programming #tutorial

How to Build a Trend Forecasting Tool with Social Scraping

Trends emerge on social media before hitting mainstream. By scraping platforms systematically, you can detect rising trends days before they become obvious.

Data Collection

# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).

Velocity Engine

The key insight: acceleration matters more than volume.

class VelocityEngine:
    def __init__(self, db):
        self.db = db

    def velocity(self, keyword, hours=24):
        now = datetime.now()
        cur_start = now - timedelta(hours=hours)
        prev_start = cur_start - timedelta(hours=hours)
        cur = self._count(keyword, cur_start, now)
        prev = self._count(keyword, prev_start, cur_start)
        if prev == 0: return float('inf') if cur > 0 else 0
        return round((cur - prev) / prev, 4)

    def _count(self, kw, start, end):
        c = self.db.execute(
            'SELECT COUNT(*) FROM mentions WHERE keyword=? AND timestamp BETWEEN ? AND ?',
            (kw, start.isoformat(), end.isoformat()))
        return c.fetchone()[0]

    def engagement_velocity(self, keyword, hours=24):
        now = datetime.now()
        cur_start = now - timedelta(hours=hours)
        prev_start = cur_start - timedelta(hours=hours)
        cur = self._engagement(keyword, cur_start, now)
        prev = self._engagement(keyword, prev_start, cur_start)
        if prev == 0: return float('inf') if cur > 0 else 0
        return round((cur - prev) / prev, 4)

    def _engagement(self, kw, start, end):
        c = self.db.execute(
            'SELECT COALESCE(SUM(engagement),0) FROM mentions WHERE keyword=? AND timestamp BETWEEN ? AND ?',
            (kw, start.isoformat(), end.isoformat()))
        return c.fetchone()[0]

Breakout Detection

class BreakoutDetector:
    def __init__(self, engine):
        self.engine = engine

    def detect(self, keywords, threshold=0.5):
        results = []
        for kw in keywords:
            mv = self.engine.velocity(kw)
            ev = self.engine.engagement_velocity(kw)
            score = (mv * 0.4) + (ev * 0.6)
            if score > threshold:
                results.append({'keyword': kw, 'mention_vel': mv,
                               'engagement_vel': ev, 'score': round(score, 3)})
        return sorted(results, key=lambda x: x['score'], reverse=True)

Trend Forecasting

class Forecaster:
    def predict(self, keyword, db, points=7):
        cursor = db.execute('''
            SELECT DATE(timestamp), COUNT(*) FROM mentions
            WHERE keyword=? GROUP BY DATE(timestamp)
            ORDER BY DATE(timestamp) DESC LIMIT ?
        ''', (keyword, points))
        data = cursor.fetchall()
        if len(data) < 3: return {'prediction': 'insufficient_data'}
        vols = [r[1] for r in reversed(data)]
        n = len(vols)
        xm = (n-1)/2
        ym = sum(vols)/n
        slope = sum((i-xm)*(v-ym) for i,v in enumerate(vols))
        slope /= sum((i-xm)**2 for i in range(n))
        rate = slope/ym if ym > 0 else 0
        if rate > 0.15: p = 'accelerating'
        elif rate > 0.05: p = 'growing'
        elif rate > -0.05: p = 'stable'
        else: p = 'declining'
        return {'keyword': keyword, 'prediction': p, 'growth_rate': round(rate, 4)}