Product Hunt is the ultimate launchpad for tech products. Scraping it lets you identify trending tools early, analyze launch patterns, and build competitive intelligence.
Using Product Hunt's GraphQL API
import requests
from datetime import datetime
class PHScraper:
def __init__(self, token):
self.url = 'https://api.producthunt.com/v2/api/graphql'
self.headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}
def get_posts(self, date=None):
if not date: date = datetime.now().strftime('%Y-%m-%d')
query = '''query { posts(postedAfter: "START", postedBefore: "END", first: 50) {
edges { node { id name tagline votesCount commentsCount website url
topics { edges { node { name } } }
makers { name username }
} }
} }'''.replace('START', f'{date}T00:00:00Z').replace('END', f'{date}T23:59:59Z')
resp = requests.post(self.url, headers=self.headers, json={'query': query})
data = resp.json()
posts = []
for edge in data['data']['posts']['edges']:
n = edge['node']
posts.append({
'name': n['name'], 'tagline': n['tagline'],
'votes': n['votesCount'], 'website': n['website'],
'topics': [t['node']['name'] for t in n['topics']['edges']]
})
return sorted(posts, key=lambda x: x['votes'], reverse=True)
Tracking Products Over Time
import sqlite3, json
class PHTracker:
def __init__(self, db='ph.db'):
self.conn = sqlite3.connect(db)
self.conn.executescript('''
CREATE TABLE IF NOT EXISTS products (id TEXT PRIMARY KEY, name TEXT, website TEXT, topics TEXT, first_seen DATE);
CREATE TABLE IF NOT EXISTS votes (product_id TEXT, votes INTEGER, checked_at TIMESTAMP);
''')
def track(self, product):
self.conn.execute('INSERT OR IGNORE INTO products VALUES (?,?,?,?,?)',
(product['name'], product['name'], product['website'],
json.dumps(product['topics']), datetime.now().date().isoformat()))
self.conn.execute('INSERT INTO votes VALUES (?,?,?)',
(product['name'], product['votes'], datetime.now().isoformat()))
self.conn.commit()
def fastest_growing(self, hours=24):
import pandas as pd
return pd.read_sql_query('''
SELECT p.name, MAX(v.votes)-MIN(v.votes) as growth
FROM votes v JOIN products p ON p.id=v.product_id
WHERE v.checked_at >= datetime('now', ?) GROUP BY v.product_id
ORDER BY growth DESC LIMIT 20''', self.conn, params=(f'-{hours} hours',))
Launch Pattern Analysis
from collections import Counter
class LaunchAnalyzer:
def __init__(self, db='ph.db'):
self.conn = sqlite3.connect(db)
def top_topics(self, days=30):
import pandas as pd
df = pd.read_sql_query(
'SELECT topics FROM products WHERE first_seen >= date("now", ?)',
self.conn, params=(f'-{days} days',))
all_topics = []
for t in df['topics']:
all_topics.extend(json.loads(t))
return Counter(all_topics).most_common(20)
Early Detection
def check_breakouts(scraper, tracker, threshold=100):
posts = scraper.get_posts()
breakouts = [p for p in posts if p['votes'] >= threshold]
for p in posts: tracker.track(p)
for p in breakouts:
print(f"BREAKOUT: {p['name']} - {p['votes']} votes - {p['website']}")
return breakouts
Scaling
ScraperAPI handles rate limiting. ThorData provides reliable proxies, and ScrapeOps keeps pipelines running.
Conclusion
Track launches, analyze patterns, and set up alerts to discover the next big product before it goes mainstream.
Top comments (0)