Job postings reveal where the economy is heading. Which skills are in demand, which cities are hiring, and which industries are growing. In this tutorial, we'll scrape job listings and build an interactive heatmap.
What We'll Build
- Multi-site job listing scraper
- Geographic and skill-based analysis
- Interactive heatmap visualization
- Trend tracking over time
Setup
pip install requests beautifulsoup4 pandas folium geopy
Job Listing Scraper
import requests
from bs4 import BeautifulSoup
import time
from datetime import datetime
SCRAPER_API_KEY = "YOUR_KEY"
def scrape_job_listings(keyword, location="", page=1):
"""Scrape job listings from a job board."""
url = (
f"https://www.indeed.com/jobs"
f"?q={keyword}&l={location}&start={page * 10}"
)
api_url = (
f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}"
f"&url={url}"
)
response = requests.get(api_url, timeout=60)
soup = BeautifulSoup(response.text, "html.parser")
jobs = []
for card in soup.select(".job_seen_beacon"):
title = card.select_one(".jobTitle")
company = card.select_one("[data-testid='company-name']")
loc = card.select_one("[data-testid='text-location']")
salary = card.select_one(".salary-snippet-container")
if title:
jobs.append({
"title": title.text.strip(),
"company": company.text.strip() if company else "N/A",
"location": loc.text.strip() if loc else "N/A",
"salary": salary.text.strip() if salary else "Not listed",
"keyword": keyword,
"scraped_at": datetime.now().isoformat()
})
return jobs
Multi-City Collection
CITIES = [
"New York, NY", "San Francisco, CA", "Austin, TX",
"Seattle, WA", "Chicago, IL", "Boston, MA",
"Denver, CO", "Atlanta, GA", "Miami, FL",
"Los Angeles, CA", "Portland, OR", "Nashville, TN"
]
SKILLS = ["python developer", "react developer", "data engineer",
"devops engineer", "machine learning engineer"]
def collect_job_data(pages_per_combo=2):
all_jobs = []
for skill in SKILLS:
for city in CITIES:
for page in range(pages_per_combo):
print(f"Scraping: {skill} in {city} (page {page + 1})")
jobs = scrape_job_listings(skill, city, page)
all_jobs.extend(jobs)
time.sleep(5)
return all_jobs
Geocoding Locations
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
geolocator = Nominatim(user_agent="job_heatmap")
geo_cache = {}
def geocode_location(location):
"""Convert location string to lat/lon coordinates."""
if location in geo_cache:
return geo_cache[location]
try:
result = geolocator.geocode(location, timeout=10)
if result:
coords = (result.latitude, result.longitude)
geo_cache[location] = coords
return coords
except GeocoderTimedOut:
pass
return None
Building the Heatmap
import folium
from folium.plugins import HeatMap
import pandas as pd
def build_job_heatmap(jobs, output="job_heatmap.html"):
"""Build an interactive heatmap from job data."""
df = pd.DataFrame(jobs)
location_counts = df.groupby("location").size().reset_index(name="count")
heat_data = []
for _, row in location_counts.iterrows():
coords = geocode_location(row["location"])
if coords:
heat_data.append([coords[0], coords[1], row["count"]])
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
HeatMap(
heat_data,
min_opacity=0.3,
radius=25,
blur=15,
gradient={0.4: "blue", 0.65: "lime", 1: "red"}
).add_to(m)
for point in sorted(heat_data, key=lambda x: x[2], reverse=True)[:10]:
folium.CircleMarker(
location=[point[0], point[1]],
radius=point[2] / 5,
popup=f"Jobs: {point[2]}",
color="red",
fill=True
).add_to(m)
m.save(output)
print(f"Heatmap saved to {output}")
# Run the full pipeline
jobs = collect_job_data(pages_per_combo=1)
build_job_heatmap(jobs)
Skill Demand Analysis
def analyze_demand(jobs):
df = pd.DataFrame(jobs)
print("=== Job Market Analysis ===\n")
skill_counts = df.groupby("keyword").size().sort_values(ascending=False)
print("Demand by Skill:")
for skill, count in skill_counts.items():
print(f" {skill}: {count} listings")
city_counts = df.groupby("location").size().sort_values(ascending=False)
print("\nTop Cities:")
for city, count in city_counts.head(10).items():
print(f" {city}: {count} listings")
print("\nSkill-City Matrix:")
matrix = df.groupby(["keyword", "location"]).size().unstack(fill_value=0)
print(matrix.to_string())
Scaling Up
For production job scraping, ScraperAPI handles Indeed's anti-bot measures seamlessly. ThorData residential proxies are essential for geo-targeted results. Track success rates with ScrapeOps.
Conclusion
A job market heatmap turns raw listings into strategic intelligence. Whether you're job hunting, hiring, or investing, this data shows you where opportunity is concentrated right now.
Top comments (0)