Scraping Product Hunt: Finding New Tools Before They Go Viral

#python #tutorial #webdev #programming

Product Hunt is the ultimate launchpad for tech products. Scraping it lets you identify trending tools early, analyze launch patterns, and build competitive intelligence.

Using Product Hunt's GraphQL API

import requests
from datetime import datetime

class PHScraper:
    def __init__(self, token):
        self.url = 'https://api.producthunt.com/v2/api/graphql'
        self.headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}

    def get_posts(self, date=None):
        if not date: date = datetime.now().strftime('%Y-%m-%d')
        query = '''query { posts(postedAfter: "START", postedBefore: "END", first: 50) {
            edges { node { id name tagline votesCount commentsCount website url
                topics { edges { node { name } } }
                makers { name username }
            } }
        } }'''.replace('START', f'{date}T00:00:00Z').replace('END', f'{date}T23:59:59Z')

        resp = requests.post(self.url, headers=self.headers, json={'query': query})
        data = resp.json()
        posts = []
        for edge in data['data']['posts']['edges']:
            n = edge['node']
            posts.append({
                'name': n['name'], 'tagline': n['tagline'],
                'votes': n['votesCount'], 'website': n['website'],
                'topics': [t['node']['name'] for t in n['topics']['edges']]
            })
        return sorted(posts, key=lambda x: x['votes'], reverse=True)

Tracking Products Over Time

import sqlite3, json

class PHTracker:
    def __init__(self, db='ph.db'):
        self.conn = sqlite3.connect(db)
        self.conn.executescript('''
            CREATE TABLE IF NOT EXISTS products (id TEXT PRIMARY KEY, name TEXT, website TEXT, topics TEXT, first_seen DATE);
            CREATE TABLE IF NOT EXISTS votes (product_id TEXT, votes INTEGER, checked_at TIMESTAMP);
        ''')

    def track(self, product):
        self.conn.execute('INSERT OR IGNORE INTO products VALUES (?,?,?,?,?)',
            (product['name'], product['name'], product['website'],
             json.dumps(product['topics']), datetime.now().date().isoformat()))
        self.conn.execute('INSERT INTO votes VALUES (?,?,?)',
            (product['name'], product['votes'], datetime.now().isoformat()))
        self.conn.commit()

    def fastest_growing(self, hours=24):
        import pandas as pd
        return pd.read_sql_query('''
            SELECT p.name, MAX(v.votes)-MIN(v.votes) as growth
            FROM votes v JOIN products p ON p.id=v.product_id
            WHERE v.checked_at >= datetime('now', ?) GROUP BY v.product_id
            ORDER BY growth DESC LIMIT 20''', self.conn, params=(f'-{hours} hours',))

Launch Pattern Analysis

from collections import Counter

class LaunchAnalyzer:
    def __init__(self, db='ph.db'):
        self.conn = sqlite3.connect(db)

    def top_topics(self, days=30):
        import pandas as pd
        df = pd.read_sql_query(
            'SELECT topics FROM products WHERE first_seen >= date("now", ?)',
            self.conn, params=(f'-{days} days',))
        all_topics = []
        for t in df['topics']:
            all_topics.extend(json.loads(t))
        return Counter(all_topics).most_common(20)

Early Detection

def check_breakouts(scraper, tracker, threshold=100):
    posts = scraper.get_posts()
    breakouts = [p for p in posts if p['votes'] >= threshold]
    for p in posts: tracker.track(p)
    for p in breakouts:
        print(f"BREAKOUT: {p['name']} - {p['votes']} votes - {p['website']}")
    return breakouts

Scaling

ScraperAPI handles rate limiting. ThorData provides reliable proxies, and ScrapeOps keeps pipelines running.

Conclusion

Track launches, analyze patterns, and set up alerts to discover the next big product before it goes mainstream.

DEV Community