DEV Community

agenthustler
agenthustler

Posted on

How to Build a Job Market Heatmap with Web Scraping

Job postings reveal where the economy is heading. Which skills are in demand, which cities are hiring, and which industries are growing. In this tutorial, we'll scrape job listings and build an interactive heatmap.

What We'll Build

  • Multi-site job listing scraper
  • Geographic and skill-based analysis
  • Interactive heatmap visualization
  • Trend tracking over time

Setup

pip install requests beautifulsoup4 pandas folium geopy
Enter fullscreen mode Exit fullscreen mode

Job Listing Scraper

import requests
from bs4 import BeautifulSoup
import time
from datetime import datetime

SCRAPER_API_KEY = "YOUR_KEY"

def scrape_job_listings(keyword, location="", page=1):
    """Scrape job listings from a job board."""
    url = (
        f"https://www.indeed.com/jobs"
        f"?q={keyword}&l={location}&start={page * 10}"
    )
    api_url = (
        f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}"
        f"&url={url}"
    )

    response = requests.get(api_url, timeout=60)
    soup = BeautifulSoup(response.text, "html.parser")

    jobs = []
    for card in soup.select(".job_seen_beacon"):
        title = card.select_one(".jobTitle")
        company = card.select_one("[data-testid='company-name']")
        loc = card.select_one("[data-testid='text-location']")
        salary = card.select_one(".salary-snippet-container")

        if title:
            jobs.append({
                "title": title.text.strip(),
                "company": company.text.strip() if company else "N/A",
                "location": loc.text.strip() if loc else "N/A",
                "salary": salary.text.strip() if salary else "Not listed",
                "keyword": keyword,
                "scraped_at": datetime.now().isoformat()
            })

    return jobs
Enter fullscreen mode Exit fullscreen mode

Multi-City Collection

CITIES = [
    "New York, NY", "San Francisco, CA", "Austin, TX",
    "Seattle, WA", "Chicago, IL", "Boston, MA",
    "Denver, CO", "Atlanta, GA", "Miami, FL",
    "Los Angeles, CA", "Portland, OR", "Nashville, TN"
]

SKILLS = ["python developer", "react developer", "data engineer",
          "devops engineer", "machine learning engineer"]

def collect_job_data(pages_per_combo=2):
    all_jobs = []

    for skill in SKILLS:
        for city in CITIES:
            for page in range(pages_per_combo):
                print(f"Scraping: {skill} in {city} (page {page + 1})")
                jobs = scrape_job_listings(skill, city, page)
                all_jobs.extend(jobs)
                time.sleep(5)

    return all_jobs
Enter fullscreen mode Exit fullscreen mode

Geocoding Locations

from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

geolocator = Nominatim(user_agent="job_heatmap")
geo_cache = {}

def geocode_location(location):
    """Convert location string to lat/lon coordinates."""
    if location in geo_cache:
        return geo_cache[location]

    try:
        result = geolocator.geocode(location, timeout=10)
        if result:
            coords = (result.latitude, result.longitude)
            geo_cache[location] = coords
            return coords
    except GeocoderTimedOut:
        pass

    return None
Enter fullscreen mode Exit fullscreen mode

Building the Heatmap

import folium
from folium.plugins import HeatMap
import pandas as pd

def build_job_heatmap(jobs, output="job_heatmap.html"):
    """Build an interactive heatmap from job data."""
    df = pd.DataFrame(jobs)

    location_counts = df.groupby("location").size().reset_index(name="count")

    heat_data = []
    for _, row in location_counts.iterrows():
        coords = geocode_location(row["location"])
        if coords:
            heat_data.append([coords[0], coords[1], row["count"]])

    m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

    HeatMap(
        heat_data,
        min_opacity=0.3,
        radius=25,
        blur=15,
        gradient={0.4: "blue", 0.65: "lime", 1: "red"}
    ).add_to(m)

    for point in sorted(heat_data, key=lambda x: x[2], reverse=True)[:10]:
        folium.CircleMarker(
            location=[point[0], point[1]],
            radius=point[2] / 5,
            popup=f"Jobs: {point[2]}",
            color="red",
            fill=True
        ).add_to(m)

    m.save(output)
    print(f"Heatmap saved to {output}")

# Run the full pipeline
jobs = collect_job_data(pages_per_combo=1)
build_job_heatmap(jobs)
Enter fullscreen mode Exit fullscreen mode

Skill Demand Analysis

def analyze_demand(jobs):
    df = pd.DataFrame(jobs)

    print("=== Job Market Analysis ===\n")

    skill_counts = df.groupby("keyword").size().sort_values(ascending=False)
    print("Demand by Skill:")
    for skill, count in skill_counts.items():
        print(f"  {skill}: {count} listings")

    city_counts = df.groupby("location").size().sort_values(ascending=False)
    print("\nTop Cities:")
    for city, count in city_counts.head(10).items():
        print(f"  {city}: {count} listings")

    print("\nSkill-City Matrix:")
    matrix = df.groupby(["keyword", "location"]).size().unstack(fill_value=0)
    print(matrix.to_string())
Enter fullscreen mode Exit fullscreen mode

Scaling Up

For production job scraping, ScraperAPI handles Indeed's anti-bot measures seamlessly. ThorData residential proxies are essential for geo-targeted results. Track success rates with ScrapeOps.

Conclusion

A job market heatmap turns raw listings into strategic intelligence. Whether you're job hunting, hiring, or investing, this data shows you where opportunity is concentrated right now.

Top comments (0)