Tech layoffs make headlines, but tracking them systematically reveals patterns that news articles miss. This guide shows you how to build a scraper that monitors layoff announcements and analyzes workforce trends.
Why Track Layoffs Programmatically?
Manual tracking is slow and incomplete. A scraper can monitor dozens of sources simultaneously, extract structured data from announcements, and build a dataset that reveals industry patterns.
Setup
pip install requests beautifulsoup4 pandas
Scraping Layoff Data Sources
Start with structured layoff tracking sites:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
def scrape_layoff_tracker():
params = {
"api_key": "YOUR_SCRAPERAPI_KEY",
"url": "https://layoffs.fyi/",
"render": "true"
}
response = requests.get("https://api.scraperapi.com", params=params)
soup = BeautifulSoup(response.text, "html.parser")
layoffs = []
table = soup.select_one("table")
if table:
headers = [th.text.strip() for th in table.select("th")]
for row in table.select("tbody tr"):
cells = [td.text.strip() for td in row.select("td")]
if len(cells) >= 4:
layoffs.append({
"company": cells[0],
"date": cells[1],
"number_affected": parse_number(cells[2]),
"percentage": cells[3] if len(cells) > 3 else "",
"industry": cells[4] if len(cells) > 4 else "",
"source": cells[-1] if len(cells) > 5 else ""
})
return pd.DataFrame(layoffs)
def parse_number(text):
numbers = re.findall(r"[\d,]+", text.replace(",", ""))
return int(numbers[0]) if numbers else 0
df = scrape_layoff_tracker()
print(f"Tracked {len(df)} layoff events")
Monitoring News Sources
Supplement tracker data with real-time news scraping:
def scrape_layoff_news(query="tech layoffs 2026"):
params = {
"api_key": "YOUR_SCRAPERAPI_KEY",
"url": f"https://news.google.com/search?q={query}&hl=en-US",
"render": "true"
}
response = requests.get("https://api.scraperapi.com", params=params)
soup = BeautifulSoup(response.text, "html.parser")
articles = []
for item in soup.select("article"):
title_el = item.select_one("a.JtKRv")
source_el = item.select_one(".vr1PYe")
time_el = item.select_one("time")
if title_el:
title = title_el.text.strip()
# Extract company and number from headline
company = extract_company(title)
count = extract_layoff_count(title)
articles.append({
"title": title,
"source": source_el.text.strip() if source_el else "",
"time": time_el.get("datetime", "") if time_el else "",
"company": company,
"estimated_count": count
})
return articles
def extract_company(headline):
known_companies = ["Google", "Meta", "Amazon", "Microsoft", "Apple",
"Tesla", "Netflix", "Salesforce", "IBM", "Intel"]
for company in known_companies:
if company.lower() in headline.lower():
return company
return headline.split()[0]
def extract_layoff_count(text):
patterns = [
r"(\d[\d,]*)\s*(?:employees|workers|jobs|staff|people)",
r"(?:cuts?|lays? off|fires?)\s*(\d[\d,]*)",
r"(\d[\d,]*)\s*(?:layoffs|cuts)"
]
for pattern in patterns:
match = re.search(pattern, text, re.I)
if match:
return int(match.group(1).replace(",", ""))
return 0
news = scrape_layoff_news()
print(f"Found {len(news)} recent articles")
Analyzing Layoff Trends
def analyze_trends(df):
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.dropna(subset=["date"])
# Monthly aggregation
monthly = df.set_index("date").resample("M").agg({
"company": "count",
"number_affected": "sum"
}).rename(columns={"company": "events"})
print("\nMonthly Layoff Summary:")
for date, row in monthly.tail(6).iterrows():
print(f" {date.strftime('%Y-%m')}: {row['events']} events, "
f"{row['number_affected']:,} affected")
# Top companies by layoffs
top = df.groupby("company")["number_affected"].sum().nlargest(10)
print("\nTop Companies by Total Layoffs:")
for company, count in top.items():
print(f" {company}: {count:,}")
return monthly
trends = analyze_trends(df)
Setting Up Automated Monitoring
import json
from datetime import datetime
def daily_layoff_check():
# Scrape latest data
tracker = scrape_layoff_tracker()
news = scrape_layoff_news()
# Compare with previous run
try:
with open("last_check.json") as f:
previous = json.load(f)
except FileNotFoundError:
previous = {"companies": []}
new_entries = tracker[~tracker["company"].isin(previous["companies"])]
if len(new_entries) > 0:
print(f"\n{len(new_entries)} new layoff events detected:")
for _, row in new_entries.iterrows():
print(f" {row['company']}: {row['number_affected']} affected")
# Save current state
with open("last_check.json", "w") as f:
json.dump({"companies": tracker["company"].tolist(),
"last_check": datetime.now().isoformat()}, f)
daily_layoff_check()
Scaling Tips
- Use ScraperAPI to handle news sites with heavy anti-bot protections
- ThorData residential proxies prevent rate limiting across multiple sources
- Monitor scraper reliability with ScrapeOps
Conclusion
A systematic layoff tracker reveals patterns invisible to casual news readers — seasonal trends, industry correlations, and early warning signals. Build incrementally, validate against known events, and you'll have a unique dataset for market analysis and career planning.
Top comments (0)