When a startup suddenly posts 15 engineering roles, something is happening — new funding, product pivot, or scaling push. Scraping VC portfolio job boards reveals these signals before press releases.
The Signal in Job Postings
Major VCs like a16z, Sequoia, and Y Combinator maintain job boards aggregating positions across their portfolio companies. Changes in hiring patterns are leading indicators of strategic shifts.
Scraping YC's Work at a Startup
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
API_KEY = "YOUR_SCRAPERAPI_KEY" # Get one at https://www.scraperapi.com?fp_ref=the52
def scrape_yc_jobs():
url = "https://www.workatastartup.com/jobs"
proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}&render=true"
response = requests.get(proxy_url, timeout=60)
soup = BeautifulSoup(response.text, 'html.parser')
jobs = []
for card in soup.select('.job-card, .job-listing, [class*="job"]'):
company = card.select_one('.company-name, [class*="company"]')
title = card.select_one('.job-title, [class*="title"]')
location = card.select_one('.location, [class*="location"]')
if title:
jobs.append({
'source': 'YC',
'company': company.text.strip() if company else 'Unknown',
'title': title.text.strip(),
'location': location.text.strip() if location else 'Remote',
'scraped_at': datetime.now().isoformat()
})
return jobs
def scrape_a16z_jobs():
url = "https://jobs.a16z.com/"
proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}&render=true"
response = requests.get(proxy_url, timeout=60)
soup = BeautifulSoup(response.text, 'html.parser')
jobs = []
for card in soup.select('.job-card, .position, [class*="job"]'):
company = card.select_one('.company, [class*="company"]')
title = card.select_one('.title, [class*="title"]')
if title:
jobs.append({
'source': 'a16z',
'company': company.text.strip() if company else 'Unknown',
'title': title.text.strip(),
'location': 'N/A',
'scraped_at': datetime.now().isoformat()
})
return jobs
yc_jobs = scrape_yc_jobs()
a16z_jobs = scrape_a16z_jobs()
all_jobs = yc_jobs + a16z_jobs
df = pd.DataFrame(all_jobs)
print(f"Total jobs scraped: {len(df)}")
Analyzing Hiring Signals
def analyze_hiring_signals(df):
company_counts = df.groupby(['source', 'company']).size().reset_index(name='open_roles')
company_counts = company_counts.sort_values('open_roles', ascending=False)
print("\nTop hiring companies:")
print(company_counts.head(20).to_string(index=False))
role_categories = {
'engineering': ['engineer', 'developer', 'sre', 'devops', 'backend', 'frontend', 'fullstack'],
'ai_ml': ['machine learning', 'ai', 'data scientist', 'ml engineer', 'llm'],
'sales': ['sales', 'account executive', 'sdr', 'business development'],
'product': ['product manager', 'product designer', 'ux'],
}
for category, keywords in role_categories.items():
mask = df['title'].str.lower().apply(lambda x: any(kw in x for kw in keywords))
count = mask.sum()
print(f"{category}: {count} openings ({count/len(df)*100:.1f}%)")
return company_counts
signals = analyze_hiring_signals(df)
Detecting Hiring Surges
Compare snapshots to detect companies ramping up. Use ThorData for residential proxies:
import sqlite3
conn = sqlite3.connect('startup_jobs.db')
def detect_surges(current_df, threshold=5):
current_counts = current_df.groupby('company').size().reset_index(name='current')
previous = pd.read_sql('''
SELECT company, COUNT(*) as previous FROM jobs
WHERE scraped_at > datetime('now', '-7 days')
GROUP BY company
''', conn)
merged = current_counts.merge(previous, on='company', how='left').fillna(0)
merged['change'] = merged['current'] - merged['previous']
surges = merged[merged['change'] >= threshold].sort_values('change', ascending=False)
if len(surges) > 0:
print("\nHIRING SURGES DETECTED:")
print(surges.to_string(index=False))
current_df.to_sql('jobs', conn, if_exists='append', index=False)
return surges
surges = detect_surges(df)
Tracking Technology Trends
tech_keywords = ['rust', 'golang', 'kubernetes', 'terraform', 'react', 'typescript',
'python', 'pytorch', 'llm', 'rag', 'vector database', 'graphql']
tech_demand = {}
for tech in tech_keywords:
count = df['title'].str.lower().str.contains(tech).sum()
if count > 0:
tech_demand[tech] = count
tech_df = pd.DataFrame(list(tech_demand.items()), columns=['technology', 'mentions'])
tech_df = tech_df.sort_values('mentions', ascending=False)
print("\nTechnology demand in VC-backed startups:")
print(tech_df.to_string(index=False))
Use Cases
- Investors: Spot portfolio companies scaling before announcements
- Job seekers: Find fast-growing startups hiring in your stack
- Recruiters: Identify companies competing for the same talent
- Analysts: Track sector-level hiring trends across VC portfolios
Monitor with ScrapeOps and use ScraperAPI for JavaScript-heavy job board rendering.
Top comments (0)