How to Build a Job Market Heatmap with Web Scraping

#python #programming #tutorial #webdev

Job postings reveal where the economy is heading. Which skills are in demand, which cities are hiring, and which industries are growing. In this tutorial, we'll scrape job listings and build an interactive heatmap.

What We'll Build

Multi-site job listing scraper
Geographic and skill-based analysis
Interactive heatmap visualization
Trend tracking over time

Setup

pip install requests beautifulsoup4 pandas folium geopy

Job Listing Scraper

# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).

Multi-City Collection

CITIES = [
    "New York, NY", "San Francisco, CA", "Austin, TX",
    "Seattle, WA", "Chicago, IL", "Boston, MA",
    "Denver, CO", "Atlanta, GA", "Miami, FL",
    "Los Angeles, CA", "Portland, OR", "Nashville, TN"
]

SKILLS = ["python developer", "react developer", "data engineer",
          "devops engineer", "machine learning engineer"]

def collect_job_data(pages_per_combo=2):
    all_jobs = []

    for skill in SKILLS:
        for city in CITIES:
            for page in range(pages_per_combo):
                print(f"Scraping: {skill} in {city} (page {page + 1})")
                jobs = scrape_job_listings(skill, city, page)
                all_jobs.extend(jobs)
                time.sleep(5)

    return all_jobs

Geocoding Locations

from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

geolocator = Nominatim(user_agent="job_heatmap")
geo_cache = {}

def geocode_location(location):
    """Convert location string to lat/lon coordinates."""
    if location in geo_cache:
        return geo_cache[location]

    try:
        result = geolocator.geocode(location, timeout=10)
        if result:
            coords = (result.latitude, result.longitude)
            geo_cache[location] = coords
            return coords
    except GeocoderTimedOut:
        pass

    return None

Building the Heatmap

import folium
from folium.plugins import HeatMap
import pandas as pd

def build_job_heatmap(jobs, output="job_heatmap.html"):
    """Build an interactive heatmap from job data."""
    df = pd.DataFrame(jobs)

    location_counts = df.groupby("location").size().reset_index(name="count")

    heat_data = []
    for _, row in location_counts.iterrows():
        coords = geocode_location(row["location"])
        if coords:
            heat_data.append([coords[0], coords[1], row["count"]])

    m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

    HeatMap(
        heat_data,
        min_opacity=0.3,
        radius=25,
        blur=15,
        gradient={0.4: "blue", 0.65: "lime", 1: "red"}
    ).add_to(m)

    for point in sorted(heat_data, key=lambda x: x[2], reverse=True)[:10]:
        folium.CircleMarker(
            location=[point[0], point[1]],
            radius=point[2] / 5,
            popup=f"Jobs: {point[2]}",
            color="red",
            fill=True
        ).add_to(m)

    m.save(output)
    print(f"Heatmap saved to {output}")

# Run the full pipeline
jobs = collect_job_data(pages_per_combo=1)
build_job_heatmap(jobs)

Skill Demand Analysis

def analyze_demand(jobs):
    df = pd.DataFrame(jobs)

    print("=== Job Market Analysis ===\n")

    skill_counts = df.groupby("keyword").size().sort_values(ascending=False)
    print("Demand by Skill:")
    for skill, count in skill_counts.items():
        print(f"  {skill}: {count} listings")

    city_counts = df.groupby("location").size().sort_values(ascending=False)
    print("\nTop Cities:")
    for city, count in city_counts.head(10).items():
        print(f"  {city}: {count} listings")

    print("\nSkill-City Matrix:")
    matrix = df.groupby(["keyword", "location"]).size().unstack(fill_value=0)
    print(matrix.to_string())

Scaling Up

For production job scraping, ScraperAPI handles Indeed's anti-bot measures seamlessly. ThorData residential proxies are essential for geo-targeted results. Track success rates with ScrapeOps.

Conclusion

A job market heatmap turns raw listings into strategic intelligence. Whether you're job hunting, hiring, or investing, this data shows you where opportunity is concentrated right now.

DEV Community