How to Build a Tech Layoff Tracker with News Scraping

#python #tutorial #webdev #programming

Tech layoffs make headlines, but tracking them systematically reveals patterns that news articles miss. This guide shows you how to build a scraper that monitors layoff announcements and analyzes workforce trends.

Why Track Layoffs Programmatically?

Manual tracking is slow and incomplete. A scraper can monitor dozens of sources simultaneously, extract structured data from announcements, and build a dataset that reveals industry patterns.

Setup

pip install requests beautifulsoup4 pandas

Scraping Layoff Data Sources

Start with structured layoff tracking sites:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

def scrape_layoff_tracker():
    params = {
        "api_key": "YOUR_SCRAPERAPI_KEY",
        "url": "https://layoffs.fyi/",
        "render": "true"
    }

    response = requests.get("https://api.scraperapi.com", params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    layoffs = []
    table = soup.select_one("table")

    if table:
        headers = [th.text.strip() for th in table.select("th")]

        for row in table.select("tbody tr"):
            cells = [td.text.strip() for td in row.select("td")]
            if len(cells) >= 4:
                layoffs.append({
                    "company": cells[0],
                    "date": cells[1],
                    "number_affected": parse_number(cells[2]),
                    "percentage": cells[3] if len(cells) > 3 else "",
                    "industry": cells[4] if len(cells) > 4 else "",
                    "source": cells[-1] if len(cells) > 5 else ""
                })

    return pd.DataFrame(layoffs)

def parse_number(text):
    numbers = re.findall(r"[\d,]+", text.replace(",", ""))
    return int(numbers[0]) if numbers else 0

df = scrape_layoff_tracker()
print(f"Tracked {len(df)} layoff events")

Monitoring News Sources

Supplement tracker data with real-time news scraping:

def scrape_layoff_news(query="tech layoffs 2026"):
    params = {
        "api_key": "YOUR_SCRAPERAPI_KEY",
        "url": f"https://news.google.com/search?q={query}&hl=en-US",
        "render": "true"
    }

    response = requests.get("https://api.scraperapi.com", params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    articles = []
    for item in soup.select("article"):
        title_el = item.select_one("a.JtKRv")
        source_el = item.select_one(".vr1PYe")
        time_el = item.select_one("time")

        if title_el:
            title = title_el.text.strip()

            # Extract company and number from headline
            company = extract_company(title)
            count = extract_layoff_count(title)

            articles.append({
                "title": title,
                "source": source_el.text.strip() if source_el else "",
                "time": time_el.get("datetime", "") if time_el else "",
                "company": company,
                "estimated_count": count
            })

    return articles

def extract_company(headline):
    known_companies = ["Google", "Meta", "Amazon", "Microsoft", "Apple",
                       "Tesla", "Netflix", "Salesforce", "IBM", "Intel"]
    for company in known_companies:
        if company.lower() in headline.lower():
            return company
    return headline.split()[0]

def extract_layoff_count(text):
    patterns = [
        r"(\d[\d,]*)\s*(?:employees|workers|jobs|staff|people)",
        r"(?:cuts?|lays? off|fires?)\s*(\d[\d,]*)",
        r"(\d[\d,]*)\s*(?:layoffs|cuts)"
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.I)
        if match:
            return int(match.group(1).replace(",", ""))
    return 0

news = scrape_layoff_news()
print(f"Found {len(news)} recent articles")

Analyzing Layoff Trends

def analyze_trends(df):
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df = df.dropna(subset=["date"])

    # Monthly aggregation
    monthly = df.set_index("date").resample("M").agg({
        "company": "count",
        "number_affected": "sum"
    }).rename(columns={"company": "events"})

    print("\nMonthly Layoff Summary:")
    for date, row in monthly.tail(6).iterrows():
        print(f"  {date.strftime('%Y-%m')}: {row['events']} events, "
              f"{row['number_affected']:,} affected")

    # Top companies by layoffs
    top = df.groupby("company")["number_affected"].sum().nlargest(10)
    print("\nTop Companies by Total Layoffs:")
    for company, count in top.items():
        print(f"  {company}: {count:,}")

    return monthly

trends = analyze_trends(df)

Setting Up Automated Monitoring

import json
from datetime import datetime

def daily_layoff_check():
    # Scrape latest data
    tracker = scrape_layoff_tracker()
    news = scrape_layoff_news()

    # Compare with previous run
    try:
        with open("last_check.json") as f:
            previous = json.load(f)
    except FileNotFoundError:
        previous = {"companies": []}

    new_entries = tracker[~tracker["company"].isin(previous["companies"])]

    if len(new_entries) > 0:
        print(f"\n{len(new_entries)} new layoff events detected:")
        for _, row in new_entries.iterrows():
            print(f"  {row['company']}: {row['number_affected']} affected")

    # Save current state
    with open("last_check.json", "w") as f:
        json.dump({"companies": tracker["company"].tolist(),
                    "last_check": datetime.now().isoformat()}, f)

daily_layoff_check()

Scaling Tips

Use ScraperAPI to handle news sites with heavy anti-bot protections
ThorData residential proxies prevent rate limiting across multiple sources
Monitor scraper reliability with ScrapeOps

Conclusion

A systematic layoff tracker reveals patterns invisible to casual news readers — seasonal trends, industry correlations, and early warning signals. Build incrementally, validate against known events, and you'll have a unique dataset for market analysis and career planning.