Hacker News "Who is Hiring?" threads are goldmines for job market intelligence. Posted monthly, they contain hundreds of real job listings from startups and tech companies. Here's how to build a monitoring system that extracts hiring signals.
The Data Opportunity
Each monthly thread gets 500-1000+ comments, each one a real company posting real roles. This data reveals which technologies are in demand, salary trends, remote work patterns, and which companies are scaling.
Setup
pip install requests beautifulsoup4 pandas
Fetching HN Job Threads
Hacker News has a free API — no authentication needed:
import requests
import re
from datetime import datetime
def get_monthly_hiring_threads(months=6):
search_url = "https://hn.algolia.com/api/v1/search"
params = {
"query": "Ask HN: Who is hiring?",
"tags": "story,author_whoishiring",
"hitsPerPage": months
}
response = requests.get(search_url, params=params)
threads = response.json()["hits"]
return [{"id": t["objectID"], "title": t["title"],
"date": t["created_at"]} for t in threads]
threads = get_monthly_hiring_threads(6)
for t in threads:
print(f"{t['date'][:10]}: {t['title']} (ID: {t['id']})")
Extracting Job Listings
Each top-level comment is a job posting:
def extract_job_listings(thread_id):
url = f"https://hn.algolia.com/api/v1/items/{thread_id}"
response = requests.get(url)
thread = response.json()
listings = []
for comment in thread.get("children", []):
if not comment.get("text"):
continue
text = comment["text"]
listing = parse_job_posting(text)
listing["thread_id"] = thread_id
listing["comment_id"] = comment["id"]
listing["posted_at"] = comment.get("created_at", "")
listings.append(listing)
return listings
def parse_job_posting(html_text):
from bs4 import BeautifulSoup
text = BeautifulSoup(html_text, "html.parser").get_text()
# Extract company name (usually first line)
lines = text.strip().split("\n")
company = lines[0].split("|")[0].strip() if lines else ""
# Detect remote work
remote = bool(re.search(r"\b(remote|distributed|anywhere)\b", text, re.I))
# Extract technologies
tech_patterns = [
"Python", "JavaScript", "TypeScript", "React", "Node", "Go", "Rust",
"Java", "Kotlin", "Swift", "Ruby", "Rails", "Django", "FastAPI",
"AWS", "GCP", "Azure", "Kubernetes", "Docker", "PostgreSQL"
]
techs = [t for t in tech_patterns if re.search(rf"\b{t}\b", text, re.I)]
# Extract salary ranges
salary_match = re.search(r"\$([\d,]+)k?\s*[-–]\s*\$?([\d,]+)k?", text)
salary_range = salary_match.group(0) if salary_match else None
return {
"company": company,
"remote": remote,
"technologies": techs,
"salary_range": salary_range,
"full_text": text[:500]
}
Building the Monitor
import pandas as pd
from collections import Counter
def analyze_hiring_trends(months=3):
threads = get_monthly_hiring_threads(months)
all_listings = []
for thread in threads:
listings = extract_job_listings(thread["id"])
for l in listings:
l["thread_date"] = thread["date"]
all_listings.extend(listings)
df = pd.DataFrame(all_listings)
# Technology demand ranking
all_techs = [t for techs in df["technologies"] for t in techs]
tech_counts = Counter(all_techs).most_common(15)
print("\nTop Technologies in Demand:")
for tech, count in tech_counts:
pct = count / len(df) * 100
print(f" {tech}: {count} listings ({pct:.1f}%)")
# Remote work trend
remote_pct = df["remote"].mean() * 100
print(f"\nRemote-friendly: {remote_pct:.1f}% of listings")
# Salary analysis
salary_df = df[df["salary_range"].notna()]
print(f"Listings with salary info: {len(salary_df)} ({len(salary_df)/len(df)*100:.1f}%)")
return df
df = analyze_hiring_trends(3)
Setting Up Alerts
Get notified when specific conditions appear:
def check_hiring_alerts(df, alerts):
matches = []
for _, row in df.iterrows():
for alert in alerts:
if alert["type"] == "technology":
if alert["value"] in row["technologies"]:
matches.append({"alert": alert, "listing": row})
elif alert["type"] == "company":
if alert["value"].lower() in row["company"].lower():
matches.append({"alert": alert, "listing": row})
return matches
my_alerts = [
{"type": "technology", "value": "Rust"},
{"type": "technology", "value": "FastAPI"},
{"type": "company", "value": "Stripe"},
]
matches = check_hiring_alerts(df, my_alerts)
print(f"Found {len(matches)} matching listings")
Recommended Tools
- ScraperAPI for handling rate limits when scraping at scale
- ThorData for reliable proxy rotation
- ScrapeOps for monitoring your scraper's performance
Conclusion
Monitoring HN hiring threads gives you a real-time pulse on the tech job market. The data is public, structured enough to parse, and refreshed monthly. Build the monitor once, and you'll have an ongoing feed of hiring intelligence that most people overlook.
Top comments (0)