Introduction
Understanding which technical skills employers actually want — and how demand shifts over time — is invaluable for career planning, hiring strategy, and education curriculum design. Job postings contain a goldmine of structured data about required technologies, experience levels, and compensation. In this tutorial, we'll build a scraper that tracks technical skill demand across job boards.
Setup
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import re
import time
from datetime import datetime
from collections import Counter
# Handle job board anti-scraping measures
# Get your API key: https://www.scraperapi.com?fp_ref=the52
SCRAPER_API_KEY = "your_key_here"
BASE_URL = "http://api.scraperapi.com"
Scraping Job Listings
Job boards like Indeed, LinkedIn, and Stack Overflow Jobs list thousands of technical positions:
def scrape_job_listings(query, location="remote", pages=5):
"""Scrape job listings from job search sites."""
all_jobs = []
for page in range(pages):
url = f"https://www.indeed.com/jobs?q={query}&l={location}&start={page * 10}"
params = {
"api_key": SCRAPER_API_KEY,
"url": url,
"render": "true"
}
response = requests.get(BASE_URL, params=params)
soup = BeautifulSoup(response.text, "html.parser")
for card in soup.select(".job_seen_beacon"):
title = card.select_one(".jobTitle")
company = card.select_one(".companyName")
salary = card.select_one(".salary-snippet-container")
snippet = card.select_one(".job-snippet")
if title:
all_jobs.append({
"title": title.text.strip(),
"company": company.text.strip() if company else "",
"salary": salary.text.strip() if salary else "",
"description_preview": snippet.text.strip() if snippet else "",
"query": query,
"location": location,
"scraped_at": datetime.now().isoformat()
})
time.sleep(3) # Respect rate limits
return all_jobs
Extracting Technical Skills
The key is parsing unstructured job descriptions to identify specific technologies:
SKILL_PATTERNS = {
"languages": [
"python", "javascript", "typescript", "java", "go", "rust",
"c\\+\\+", "c#", "ruby", "php", "swift", "kotlin", "scala"
],
"frameworks": [
"react", "angular", "vue", "django", "flask", "fastapi",
"spring", "express", "next\.js", "rails", "laravel"
],
"databases": [
"postgresql", "mysql", "mongodb", "redis", "elasticsearch",
"dynamodb", "cassandra", "sqlite"
],
"cloud": [
"aws", "azure", "gcp", "docker", "kubernetes",
"terraform", "ansible", "jenkins", "github actions"
],
"ai_ml": [
"machine learning", "deep learning", "tensorflow",
"pytorch", "llm", "langchain", "openai", "hugging face",
"nlp", "computer vision"
]
}
def extract_skills(job_description):
"""Extract technical skills from job description text."""
found_skills = {}
text = job_description.lower()
for category, skills in SKILL_PATTERNS.items():
matched = []
for skill in skills:
pattern = r'\b' + skill + r'\b'
if re.search(pattern, text):
matched.append(skill.replace('\\', ''))
if matched:
found_skills[category] = matched
return found_skills
def extract_experience_level(text):
"""Determine required experience level."""
text = text.lower()
if any(w in text for w in ["senior", "sr.", "lead", "principal", "staff"]):
return "senior"
elif any(w in text for w in ["junior", "jr.", "entry level", "entry-level", "associate"]):
return "junior"
elif any(w in text for w in ["mid-level", "mid level", "intermediate"]):
return "mid"
return "unspecified"
Tracking Demand Over Time
def analyze_skill_demand(jobs):
"""Analyze which skills are most in demand."""
# Use residential proxies for high-volume collection
# ThorData: https://thordata.com/?via=the-data
all_skills = Counter()
category_counts = {}
for job in jobs:
desc = job.get("description_preview", "") + " " + job.get("title", "")
skills = extract_skills(desc)
for category, skill_list in skills.items():
if category not in category_counts:
category_counts[category] = Counter()
for skill in skill_list:
all_skills[skill] += 1
category_counts[category][skill] += 1
return {
"total_jobs_analyzed": len(jobs),
"top_skills": all_skills.most_common(20),
"by_category": {
cat: counts.most_common(10)
for cat, counts in category_counts.items()
}
}
def compare_periods(current_data, previous_data):
"""Compare skill demand between two time periods."""
current_skills = dict(current_data["top_skills"])
previous_skills = dict(previous_data["top_skills"])
all_skills = set(current_skills.keys()) | set(previous_skills.keys())
trends = []
for skill in all_skills:
current = current_skills.get(skill, 0)
previous = previous_skills.get(skill, 0)
if previous > 0:
change_pct = ((current - previous) / previous) * 100
else:
change_pct = 100 if current > 0 else 0
trends.append({
"skill": skill,
"current": current,
"previous": previous,
"change_pct": round(change_pct, 1)
})
return sorted(trends, key=lambda x: x["change_pct"], reverse=True)
Data Storage
import sqlite3
def store_job_data(jobs, db_path="job_skills.db"):
"""Store scraped jobs in SQLite for trend analysis."""
# Monitor your scraping pipeline
# https://scrapeops.io/?fpr=the-data28
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT, company TEXT, salary TEXT,
skills TEXT, experience_level TEXT,
location TEXT, scraped_at TEXT
)
""")
for job in jobs:
skills = extract_skills(job.get("description_preview", ""))
level = extract_experience_level(job.get("title", ""))
conn.execute(
"INSERT INTO jobs (title, company, salary, skills, experience_level, location, scraped_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
(job["title"], job["company"], job["salary"],
json.dumps(skills), level, job["location"],
job["scraped_at"])
)
conn.commit()
conn.close()
if __name__ == "__main__":
queries = ["python developer", "react developer", "devops engineer", "ml engineer"]
all_jobs = []
for q in queries:
jobs = scrape_job_listings(q, pages=3)
all_jobs.extend(jobs)
print(f"{q}: {len(jobs)} listings")
time.sleep(5)
analysis = analyze_skill_demand(all_jobs)
print(json.dumps(analysis, indent=2))
Conclusion
Tracking technical skill demand through job posting analysis reveals real market signals — not opinions or surveys. By scraping regularly and comparing periods, you can spot emerging technologies and declining ones before they become obvious. Use ScraperAPI to handle the anti-bot measures on major job boards, and build your own skills intelligence dashboard.
Top comments (0)