Introduction
Understanding which technical skills employers actually want — and how demand shifts over time — is invaluable for career planning, hiring strategy, and education curriculum design. Job postings contain a goldmine of structured data about required technologies, experience levels, and compensation. In this tutorial, we'll build a scraper that tracks technical skill demand across job boards.
Setup
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Scraping Job Listings
Job boards like Indeed, LinkedIn, and Stack Overflow Jobs list thousands of technical positions:
# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).
Extracting Technical Skills
The key is parsing unstructured job descriptions to identify specific technologies:
SKILL_PATTERNS = {
"languages": [
"python", "javascript", "typescript", "java", "go", "rust",
"c\\+\\+", "c#", "ruby", "php", "swift", "kotlin", "scala"
],
"frameworks": [
"react", "angular", "vue", "django", "flask", "fastapi",
"spring", "express", "next\.js", "rails", "laravel"
],
"databases": [
"postgresql", "mysql", "mongodb", "redis", "elasticsearch",
"dynamodb", "cassandra", "sqlite"
],
"cloud": [
"aws", "azure", "gcp", "docker", "kubernetes",
"terraform", "ansible", "jenkins", "github actions"
],
"ai_ml": [
"machine learning", "deep learning", "tensorflow",
"pytorch", "llm", "langchain", "openai", "hugging face",
"nlp", "computer vision"
]
}
def extract_skills(job_description):
"""Extract technical skills from job description text."""
found_skills = {}
text = job_description.lower()
for category, skills in SKILL_PATTERNS.items():
matched = []
for skill in skills:
pattern = r'\b' + skill + r'\b'
if re.search(pattern, text):
matched.append(skill.replace('\\', ''))
if matched:
found_skills[category] = matched
return found_skills
def extract_experience_level(text):
"""Determine required experience level."""
text = text.lower()
if any(w in text for w in ["senior", "sr.", "lead", "principal", "staff"]):
return "senior"
elif any(w in text for w in ["junior", "jr.", "entry level", "entry-level", "associate"]):
return "junior"
elif any(w in text for w in ["mid-level", "mid level", "intermediate"]):
return "mid"
return "unspecified"
Tracking Demand Over Time
def analyze_skill_demand(jobs):
"""Analyze which skills are most in demand."""
# Use residential proxies for high-volume collection
# ThorData: https://thordata.com/?via=the-data
all_skills = Counter()
category_counts = {}
for job in jobs:
desc = job.get("description_preview", "") + " " + job.get("title", "")
skills = extract_skills(desc)
for category, skill_list in skills.items():
if category not in category_counts:
category_counts[category] = Counter()
for skill in skill_list:
all_skills[skill] += 1
category_counts[category][skill] += 1
return {
"total_jobs_analyzed": len(jobs),
"top_skills": all_skills.most_common(20),
"by_category": {
cat: counts.most_common(10)
for cat, counts in category_counts.items()
}
}
def compare_periods(current_data, previous_data):
"""Compare skill demand between two time periods."""
current_skills = dict(current_data["top_skills"])
previous_skills = dict(previous_data["top_skills"])
all_skills = set(current_skills.keys()) | set(previous_skills.keys())
trends = []
for skill in all_skills:
current = current_skills.get(skill, 0)
previous = previous_skills.get(skill, 0)
if previous > 0:
change_pct = ((current - previous) / previous) * 100
else:
change_pct = 100 if current > 0 else 0
trends.append({
"skill": skill,
"current": current,
"previous": previous,
"change_pct": round(change_pct, 1)
})
return sorted(trends, key=lambda x: x["change_pct"], reverse=True)
Data Storage
import sqlite3
def store_job_data(jobs, db_path="job_skills.db"):
"""Store scraped jobs in SQLite for trend analysis."""
# Monitor your scraping pipeline
# https://scrapeops.io/?fpr=the-data28
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT, company TEXT, salary TEXT,
skills TEXT, experience_level TEXT,
location TEXT, scraped_at TEXT
)
""")
for job in jobs:
skills = extract_skills(job.get("description_preview", ""))
level = extract_experience_level(job.get("title", ""))
conn.execute(
"INSERT INTO jobs (title, company, salary, skills, experience_level, location, scraped_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
(job["title"], job["company"], job["salary"],
json.dumps(skills), level, job["location"],
job["scraped_at"])
)
conn.commit()
conn.close()
if __name__ == "__main__":
queries = ["python developer", "react developer", "devops engineer", "ml engineer"]
all_jobs = []
for q in queries:
jobs = scrape_job_listings(q, pages=3)
all_jobs.extend(jobs)
print(f"{q}: {len(jobs)} listings")
time.sleep(5)
analysis = analyze_skill_demand(all_jobs)
print(json.dumps(analysis, indent=2))
Conclusion
Tracking technical skill demand through job posting analysis reveals real market signals — not opinions or surveys. By scraping regularly and comparing periods, you can spot emerging technologies and declining ones before they become obvious. Use ScraperAPI to handle the anti-bot measures on major job boards, and build your own skills intelligence dashboard.
Top comments (0)