DEV Community

agenthustler
agenthustler

Posted on

How to Build an Automated Product Hunt Launch Analyzer with Python

Product Hunt launches can make or break a startup's early traction. But most founders launch blind, hoping for the best. What if you could analyze every successful launch, extract the patterns, and optimize your strategy with data?

Let's build an automated Product Hunt launch analyzer.

What We're Building

  • Scrape Product Hunt launch pages for upvotes, comments, and maker activity
  • Analyze launch timing, taglines, and category patterns
  • Score launch strategies against historical top performers
  • Generate actionable recommendations

Setting Up

pip install requests beautifulsoup4 pandas
Enter fullscreen mode Exit fullscreen mode

Scraping Product Hunt Launches

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

SCRAPER_API_KEY = "YOUR_KEY"

def scrape_daily_launches(date: str) -> list[dict]:
    params = {
        "api_key": SCRAPER_API_KEY,
        "url": f"https://www.producthunt.com/leaderboard/daily/{date}/all",
        "render": "true"
    }

    resp = requests.get("https://api.scraperapi.com", params=params, timeout=60)
    soup = BeautifulSoup(resp.text, "html.parser")

    launches = []
    for item in soup.select("[data-test='post-item']"):
        name_el = item.select_one("[data-test='post-name']")
        tagline_el = item.select_one("[data-test='post-tagline']")
        votes_el = item.select_one("[data-test='vote-button'] span")
        comments_el = item.select_one("[data-test='comment-button'] span")

        launches.append({
            "name": name_el.get_text(strip=True) if name_el else "",
            "tagline": tagline_el.get_text(strip=True) if tagline_el else "",
            "upvotes": int(votes_el.get_text(strip=True)) if votes_el else 0,
            "comments": int(comments_el.get_text(strip=True)) if comments_el else 0,
            "date": date,
        })

    return launches

def scrape_date_range(start_date: str, days: int = 30) -> list[dict]:
    all_launches = []
    current = datetime.strptime(start_date, "%Y-%m-%d")

    for i in range(days):
        date_str = current.strftime("%Y-%m-%d")
        print(f"Scraping {date_str}...")
        launches = scrape_daily_launches(date_str)
        all_launches.extend(launches)
        current += timedelta(days=1)
        time.sleep(3)

    return all_launches
Enter fullscreen mode Exit fullscreen mode

The Analysis Engine

def analyze_launches(launches: list[dict]) -> dict:
    df = pd.DataFrame(launches)

    analysis = {
        "total_launches": len(df),
        "avg_upvotes": df["upvotes"].mean(),
        "median_upvotes": df["upvotes"].median(),
        "top_10_pct_threshold": df["upvotes"].quantile(0.9),
    }

    df["day_of_week"] = pd.to_datetime(df["date"]).dt.day_name()
    day_stats = df.groupby("day_of_week")["upvotes"].agg(["mean", "median", "count"])
    analysis["best_day"] = day_stats["mean"].idxmax()

    return analysis

def analyze_tagline_patterns(launches: list[dict]) -> dict:
    df = pd.DataFrame(launches)

    patterns = {
        "has_emoji": 0,
        "mentions_ai": 0,
        "under_10_words": 0,
        "has_comparison": 0,
    }

    top_launches = df.nlargest(int(len(df) * 0.1), "upvotes")

    for _, row in top_launches.iterrows():
        tagline = row["tagline"]
        words = tagline.split()

        if any(ord(c) > 127 for c in tagline):
            patterns["has_emoji"] += 1
        if "ai" in tagline.lower() or "gpt" in tagline.lower():
            patterns["mentions_ai"] += 1
        if len(words) < 10:
            patterns["under_10_words"] += 1
        if any(w in tagline.lower() for w in ["like", "but", "meets", "for"]):
            patterns["has_comparison"] += 1

    total = len(top_launches)
    if total == 0:
        return patterns
    return {k: f"{v}/{total} ({v/total*100:.0f}%)" for k, v in patterns.items()}
Enter fullscreen mode Exit fullscreen mode

Launch Score Calculator

def score_launch_strategy(
    tagline: str,
    has_video: bool,
    num_screenshots: int,
    launch_day: str,
    topics: list[str],
    historical_data: pd.DataFrame
) -> dict:
    score = 0
    feedback = []

    words = tagline.split()
    if 5 <= len(words) <= 12:
        score += 15
        feedback.append("Good tagline length (5-12 words)")
    else:
        feedback.append(f"Tagline is {len(words)} words. Aim for 5-12.")

    if has_video:
        score += 20
        feedback.append("Video demo included (strong signal)")
    else:
        score += 5
        feedback.append("Consider adding a video demo (+15 potential)")

    if num_screenshots >= 3:
        score += 15
        feedback.append(f"{num_screenshots} screenshots (good)")
    else:
        feedback.append(f"Only {num_screenshots} screenshots. Add more visuals.")

    day_perf = historical_data.groupby(
        pd.to_datetime(historical_data["date"]).dt.day_name()
    )["upvotes"].mean()

    best_day = day_perf.idxmax()
    if launch_day == best_day:
        score += 20
        feedback.append(f"Launching on {launch_day} (best performing day)")
    else:
        feedback.append(f"Consider launching on {best_day} instead of {launch_day}")
        score += 10

    if topics:
        score += 10
        feedback.append(f"Topics: {', '.join(topics)}")

    score += 20
    feedback.append("Plan to respond to every comment within 1 hour")

    return {
        "score": score, "max_score": 100,
        "grade": "A" if score >= 80 else "B" if score >= 60 else "C" if score >= 40 else "D",
        "feedback": feedback
    }
Enter fullscreen mode Exit fullscreen mode

Running the Full Analysis

def full_analysis():
    print("Scraping recent Product Hunt launches...")
    launches = scrape_date_range("2026-02-01", days=30)

    print(f"Analyzing {len(launches)} launches...")
    stats = analyze_launches(launches)

    print(f"\n=== Product Hunt Launch Analysis ===")
    print(f"Average upvotes: {stats['avg_upvotes']:.0f}")
    print(f"Median upvotes: {stats['median_upvotes']:.0f}")
    print(f"Top 10% threshold: {stats['top_10_pct_threshold']:.0f}")
    print(f"Best day to launch: {stats['best_day']}")

    tagline_stats = analyze_tagline_patterns(launches)
    print(f"\n=== Top Performer Tagline Patterns ===")
    for pattern, value in tagline_stats.items():
        print(f"  {pattern}: {value}")

    df = pd.DataFrame(launches)
    result = score_launch_strategy(
        tagline="Build internal tools 10x faster with AI",
        has_video=True,
        num_screenshots=5,
        launch_day="Tuesday",
        topics=["Developer Tools", "Artificial Intelligence"],
        historical_data=df
    )

    print(f"\n=== Your Launch Score: {result['score']}/{result['max_score']} ({result['grade']}) ===")
    for item in result["feedback"]:
        print(f"  - {item}")

full_analysis()
Enter fullscreen mode Exit fullscreen mode

Infrastructure

For reliable Product Hunt scraping at scale, ScraperAPI handles the JavaScript rendering these pages require. Use ThorData residential proxies for high-volume historical analysis. Track your scraper health with ScrapeOps.

What to Build Next

  • Competitor launch tracking with alerts
  • Historical trend analysis by category
  • Automated launch day recommendations
  • Comment sentiment analysis for market research

The founders who treat Product Hunt as a data problem consistently outperform those who wing it. Now you have the tools to join them.

Top comments (0)