How to Build an Automated Product Hunt Launch Analyzer with Python

#python #tutorial #webdev #programming

Product Hunt launches can make or break a startup's early traction. But most founders launch blind, hoping for the best. What if you could analyze every successful launch, extract the patterns, and optimize your strategy with data?

Let's build an automated Product Hunt launch analyzer.

What We're Building

Scrape Product Hunt launch pages for upvotes, comments, and maker activity
Analyze launch timing, taglines, and category patterns
Score launch strategies against historical top performers
Generate actionable recommendations

Setting Up

pip install requests beautifulsoup4 pandas

Scraping Product Hunt Launches

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

SCRAPER_API_KEY = "YOUR_KEY"

def scrape_daily_launches(date: str) -> list[dict]:
    params = {
        "api_key": SCRAPER_API_KEY,
        "url": f"https://www.producthunt.com/leaderboard/daily/{date}/all",
        "render": "true"
    }

    resp = requests.get("https://api.scraperapi.com", params=params, timeout=60)
    soup = BeautifulSoup(resp.text, "html.parser")

    launches = []
    for item in soup.select("[data-test='post-item']"):
        name_el = item.select_one("[data-test='post-name']")
        tagline_el = item.select_one("[data-test='post-tagline']")
        votes_el = item.select_one("[data-test='vote-button'] span")
        comments_el = item.select_one("[data-test='comment-button'] span")

        launches.append({
            "name": name_el.get_text(strip=True) if name_el else "",
            "tagline": tagline_el.get_text(strip=True) if tagline_el else "",
            "upvotes": int(votes_el.get_text(strip=True)) if votes_el else 0,
            "comments": int(comments_el.get_text(strip=True)) if comments_el else 0,
            "date": date,
        })

    return launches

def scrape_date_range(start_date: str, days: int = 30) -> list[dict]:
    all_launches = []
    current = datetime.strptime(start_date, "%Y-%m-%d")

    for i in range(days):
        date_str = current.strftime("%Y-%m-%d")
        print(f"Scraping {date_str}...")
        launches = scrape_daily_launches(date_str)
        all_launches.extend(launches)
        current += timedelta(days=1)
        time.sleep(3)

    return all_launches

The Analysis Engine

def analyze_launches(launches: list[dict]) -> dict:
    df = pd.DataFrame(launches)

    analysis = {
        "total_launches": len(df),
        "avg_upvotes": df["upvotes"].mean(),
        "median_upvotes": df["upvotes"].median(),
        "top_10_pct_threshold": df["upvotes"].quantile(0.9),
    }

    df["day_of_week"] = pd.to_datetime(df["date"]).dt.day_name()
    day_stats = df.groupby("day_of_week")["upvotes"].agg(["mean", "median", "count"])
    analysis["best_day"] = day_stats["mean"].idxmax()

    return analysis

def analyze_tagline_patterns(launches: list[dict]) -> dict:
    df = pd.DataFrame(launches)

    patterns = {
        "has_emoji": 0,
        "mentions_ai": 0,
        "under_10_words": 0,
        "has_comparison": 0,
    }

    top_launches = df.nlargest(int(len(df) * 0.1), "upvotes")

    for _, row in top_launches.iterrows():
        tagline = row["tagline"]
        words = tagline.split()

        if any(ord(c) > 127 for c in tagline):
            patterns["has_emoji"] += 1
        if "ai" in tagline.lower() or "gpt" in tagline.lower():
            patterns["mentions_ai"] += 1
        if len(words) < 10:
            patterns["under_10_words"] += 1
        if any(w in tagline.lower() for w in ["like", "but", "meets", "for"]):
            patterns["has_comparison"] += 1

    total = len(top_launches)
    if total == 0:
        return patterns
    return {k: f"{v}/{total} ({v/total*100:.0f}%)" for k, v in patterns.items()}

Launch Score Calculator

def score_launch_strategy(
    tagline: str,
    has_video: bool,
    num_screenshots: int,
    launch_day: str,
    topics: list[str],
    historical_data: pd.DataFrame
) -> dict:
    score = 0
    feedback = []

    words = tagline.split()
    if 5 <= len(words) <= 12:
        score += 15
        feedback.append("Good tagline length (5-12 words)")
    else:
        feedback.append(f"Tagline is {len(words)} words. Aim for 5-12.")

    if has_video:
        score += 20
        feedback.append("Video demo included (strong signal)")
    else:
        score += 5
        feedback.append("Consider adding a video demo (+15 potential)")

    if num_screenshots >= 3:
        score += 15
        feedback.append(f"{num_screenshots} screenshots (good)")
    else:
        feedback.append(f"Only {num_screenshots} screenshots. Add more visuals.")

    day_perf = historical_data.groupby(
        pd.to_datetime(historical_data["date"]).dt.day_name()
    )["upvotes"].mean()

    best_day = day_perf.idxmax()
    if launch_day == best_day:
        score += 20
        feedback.append(f"Launching on {launch_day} (best performing day)")
    else:
        feedback.append(f"Consider launching on {best_day} instead of {launch_day}")
        score += 10

    if topics:
        score += 10
        feedback.append(f"Topics: {', '.join(topics)}")

    score += 20
    feedback.append("Plan to respond to every comment within 1 hour")

    return {
        "score": score, "max_score": 100,
        "grade": "A" if score >= 80 else "B" if score >= 60 else "C" if score >= 40 else "D",
        "feedback": feedback
    }

Running the Full Analysis

def full_analysis():
    print("Scraping recent Product Hunt launches...")
    launches = scrape_date_range("2026-02-01", days=30)

    print(f"Analyzing {len(launches)} launches...")
    stats = analyze_launches(launches)

    print(f"\n=== Product Hunt Launch Analysis ===")
    print(f"Average upvotes: {stats['avg_upvotes']:.0f}")
    print(f"Median upvotes: {stats['median_upvotes']:.0f}")
    print(f"Top 10% threshold: {stats['top_10_pct_threshold']:.0f}")
    print(f"Best day to launch: {stats['best_day']}")

    tagline_stats = analyze_tagline_patterns(launches)
    print(f"\n=== Top Performer Tagline Patterns ===")
    for pattern, value in tagline_stats.items():
        print(f"  {pattern}: {value}")

    df = pd.DataFrame(launches)
    result = score_launch_strategy(
        tagline="Build internal tools 10x faster with AI",
        has_video=True,
        num_screenshots=5,
        launch_day="Tuesday",
        topics=["Developer Tools", "Artificial Intelligence"],
        historical_data=df
    )

    print(f"\n=== Your Launch Score: {result['score']}/{result['max_score']} ({result['grade']}) ===")
    for item in result["feedback"]:
        print(f"  - {item}")

full_analysis()

Infrastructure

For reliable Product Hunt scraping at scale, ScraperAPI handles the JavaScript rendering these pages require. Use ThorData residential proxies for high-volume historical analysis. Track your scraper health with ScrapeOps.

What to Build Next

Competitor launch tracking with alerts
Historical trend analysis by category
Automated launch day recommendations
Comment sentiment analysis for market research

The founders who treat Product Hunt as a data problem consistently outperform those who wing it. Now you have the tools to join them.

DEV Community