How to Build a Startup Failure Post-Mortem Aggregator

#python #tutorial #webdev #programming

Learning from failure is cheaper than experiencing it. Hundreds of founders publish post-mortems when their startups die -- scraping and analyzing these creates a valuable knowledge base for entrepreneurs.

Data Sources

Startup post-mortems appear on personal blogs, Medium, Hacker News, and dedicated sites. We'll build scrapers that collect and analyze them.

Post-Mortem Scraper

pip install requests beautifulsoup4 textblob pandas

import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import pandas as pd

class PostMortemScraper:
    def __init__(self, api_key):
        self.api_key = api_key

    def fetch_page(self, url):
        proxy = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}"
        return requests.get(proxy, timeout=30)

    def scrape_hn_postmortems(self, pages=3):
        posts = []
        for page in range(1, pages + 1):
            url = f"https://hn.algolia.com/api/v1/search?query=post-mortem+startup&tags=story&page={page}"
            resp = requests.get(url, timeout=15)
            data = resp.json()
            for hit in data.get("hits", []):
                title = hit.get("title", "")
                if any(kw in title.lower() for kw in ["post-mortem", "postmortem", "failed", "shutdown"]):
                    posts.append({
                        "title": title,
                        "url": hit.get("url", ""),
                        "points": hit.get("points", 0),
                        "comments": hit.get("num_comments", 0),
                        "source": "hackernews"
                    })
        return posts

    def analyze_postmortem(self, text):
        blob = TextBlob(text)
        reason_keywords = {
            "no market need": ["no market", "no demand", "nobody wanted"],
            "ran out of money": ["ran out of", "funding", "runway", "burn rate"],
            "team issues": ["co-founder", "team", "disagreement"],
            "competition": ["competitor", "competed", "outcompeted"],
            "bad timing": ["too early", "too late", "timing"],
            "product issues": ["product-market fit", "pivot", "wrong product"],
        }
        failure_reasons = []
        for reason, keywords in reason_keywords.items():
            if any(kw in text.lower() for kw in keywords):
                failure_reasons.append(reason)

        return {
            "sentiment": round(blob.sentiment.polarity, 2),
            "failure_reasons": list(set(failure_reasons)),
            "word_count": len(text.split()),
        }

# Usage
scraper = PostMortemScraper("YOUR_SCRAPERAPI_KEY")
posts = scraper.scrape_hn_postmortems(pages=3)
print(f"Found {len(posts)} post-mortems")

if posts:
    top = max(posts, key=lambda x: x["points"])
    content = scraper.fetch_page(top["url"]).text[:5000]
    analysis = scraper.analyze_postmortem(content)
    print(f"Top post: {top['title']}")
    print(f"Failure reasons: {', '.join(analysis['failure_reasons'])}")

Building the Knowledge Base

def build_knowledge_base(scraper, posts):
    kb = []
    for post in posts[:20]:
        if not post.get("url"):
            continue
        content = scraper.fetch_page(post["url"]).text[:5000]
        analysis = scraper.analyze_postmortem(content)
        kb.append({**post, **analysis})
    df = pd.DataFrame(kb)
    all_reasons = [r for reasons in df["failure_reasons"] for r in reasons]
    print("Top failure reasons:")
    print(pd.Series(all_reasons).value_counts().to_string())
    return df

Scaling

Blog scraping hits many different sites with varying anti-bot measures. ScraperAPI handles this diversity. For large-scale collection, ThorData proxies ensure reliable access. Monitor collection progress with ScrapeOps.

Conclusion

A post-mortem aggregator turns scattered failure stories into structured wisdom. The NLP analysis automatically categorizes failure reasons, making it easy to spot patterns. Whether you're a founder, investor, or researcher, this data helps you avoid well-documented mistakes.

DEV Community