Every month, HN publishes a "Who's Hiring?" thread with hundreds of real job postings. This data reveals what technologies companies actually use and what skills they pay for.
Fetching via the HN API
import requests, re, time
from collections import Counter
class HNJobScraper:
def __init__(self):
self.api = "https://hacker-news.firebaseio.com/v0"
def find_threads(self, months=6):
user = requests.get(f"{self.api}/user/whoishiring.json").json()
ids = []
for sid in user["submitted"][:50]:
item = requests.get(f"{self.api}/item/{sid}.json").json()
if item and "who is hiring" in item.get("title","").lower():
ids.append(sid)
if len(ids) >= months: break
time.sleep(0.2)
return ids
def scrape_thread(self, tid):
thread = requests.get(f"{self.api}/item/{tid}.json").json()
posts = []
for cid in thread.get("kids", []):
c = requests.get(f"{self.api}/item/{cid}.json").json()
if c and not c.get("deleted"):
posts.append(self.parse(c.get("text", "")))
time.sleep(0.1)
return posts
Parsing Postings
def parse(self, html):
from bs4 import BeautifulSoup
text = BeautifulSoup(html, "html.parser").get_text()
p = {"technologies": [], "remote": False, "salary": None}
if re.search(r"\b(remote|wfh|hybrid)\b", text, re.I): p["remote"] = True
m = re.search(r"\$([\d,]+)[kK]?\s*[-\u2013]\s*\$?([\d,]+)[kK]?", text)
if m:
lo, hi = int(m.group(1).replace(",","")), int(m.group(2).replace(",",""))
if lo < 1000: lo *= 1000
if hi < 1000: hi *= 1000
p["salary"] = {"low": lo, "high": hi}
techs = {"Python":r"\bpython\b","Rust":r"\brust\b","Go":r"\bgolang\b|\bgo\b",
"TypeScript":r"\btypescript\b","React":r"\breact\b","PostgreSQL":r"\bpostgres",
"K8s":r"\bk8s\b|\bkubernetes\b","AWS":r"\baws\b","Docker":r"\bdocker\b",
"AI/ML":r"\b(machine learning|ml|ai|llm|gpt)\b"}
for t, pat in techs.items():
if re.search(pat, text, re.I): p["technologies"].append(t)
return p
Analyzing Trends
def analyze(self, posts):
tc = Counter()
salaries, remote = [], 0
for p in posts:
for t in p["technologies"]: tc[t] += 1
if p["salary"]: salaries.append(p["salary"])
if p["remote"]: remote += 1
n = len(posts)
print(f"Total: {n} | Remote: {remote} ({remote/n*100:.0f}%)")
for t, c in tc.most_common(15):
print(f" {t:15s} {c:4d} ({c/n*100:.1f}%)")
if salaries:
print(f"Avg salary: ${sum(s['low'] for s in salaries)/len(salaries):,.0f}"
f" - ${sum(s['high'] for s in salaries)/len(salaries):,.0f}")
s = HNJobScraper()
all_posts = []
for tid in s.find_threads(6):
all_posts.extend(s.scrape_thread(tid))
time.sleep(1)
s.analyze(all_posts)
Scaling
Use ScraperAPI for reliable requests, ScrapeOps for monitoring, and ThorData for career page proxies.
Insights
- Technology momentum quarter over quarter
- Real salary benchmarks by stack and location
- Remote work trend tracking
- Startup hiring patterns revealed through job posts
Top comments (0)