Learning from failure is cheaper than experiencing it. Hundreds of founders publish post-mortems when their startups die -- scraping and analyzing these creates a valuable knowledge base for entrepreneurs.
Data Sources
Startup post-mortems appear on personal blogs, Medium, Hacker News, and dedicated sites. We'll build scrapers that collect and analyze them.
Post-Mortem Scraper
pip install requests beautifulsoup4 textblob pandas
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import pandas as pd
class PostMortemScraper:
def __init__(self, api_key):
self.api_key = api_key
def fetch_page(self, url):
proxy = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}"
return requests.get(proxy, timeout=30)
def scrape_hn_postmortems(self, pages=3):
posts = []
for page in range(1, pages + 1):
url = f"https://hn.algolia.com/api/v1/search?query=post-mortem+startup&tags=story&page={page}"
resp = requests.get(url, timeout=15)
data = resp.json()
for hit in data.get("hits", []):
title = hit.get("title", "")
if any(kw in title.lower() for kw in ["post-mortem", "postmortem", "failed", "shutdown"]):
posts.append({
"title": title,
"url": hit.get("url", ""),
"points": hit.get("points", 0),
"comments": hit.get("num_comments", 0),
"source": "hackernews"
})
return posts
def analyze_postmortem(self, text):
blob = TextBlob(text)
reason_keywords = {
"no market need": ["no market", "no demand", "nobody wanted"],
"ran out of money": ["ran out of", "funding", "runway", "burn rate"],
"team issues": ["co-founder", "team", "disagreement"],
"competition": ["competitor", "competed", "outcompeted"],
"bad timing": ["too early", "too late", "timing"],
"product issues": ["product-market fit", "pivot", "wrong product"],
}
failure_reasons = []
for reason, keywords in reason_keywords.items():
if any(kw in text.lower() for kw in keywords):
failure_reasons.append(reason)
return {
"sentiment": round(blob.sentiment.polarity, 2),
"failure_reasons": list(set(failure_reasons)),
"word_count": len(text.split()),
}
# Usage
scraper = PostMortemScraper("YOUR_SCRAPERAPI_KEY")
posts = scraper.scrape_hn_postmortems(pages=3)
print(f"Found {len(posts)} post-mortems")
if posts:
top = max(posts, key=lambda x: x["points"])
content = scraper.fetch_page(top["url"]).text[:5000]
analysis = scraper.analyze_postmortem(content)
print(f"Top post: {top['title']}")
print(f"Failure reasons: {', '.join(analysis['failure_reasons'])}")
Building the Knowledge Base
def build_knowledge_base(scraper, posts):
kb = []
for post in posts[:20]:
if not post.get("url"):
continue
content = scraper.fetch_page(post["url"]).text[:5000]
analysis = scraper.analyze_postmortem(content)
kb.append({**post, **analysis})
df = pd.DataFrame(kb)
all_reasons = [r for reasons in df["failure_reasons"] for r in reasons]
print("Top failure reasons:")
print(pd.Series(all_reasons).value_counts().to_string())
return df
Scaling
Blog scraping hits many different sites with varying anti-bot measures. ScraperAPI handles this diversity. For large-scale collection, ThorData proxies ensure reliable access. Monitor collection progress with ScrapeOps.
Conclusion
A post-mortem aggregator turns scattered failure stories into structured wisdom. The NLP analysis automatically categorizes failure reasons, making it easy to spot patterns. Whether you're a founder, investor, or researcher, this data helps you avoid well-documented mistakes.
Top comments (0)