LinkedIn is one of the richest sources of job market data — millions of listings updated daily across every industry and geography. Whether you're building an HR tech tool, researching salary trends, or aggregating job boards, scraping LinkedIn job listings is a high-value skill.
The good news? LinkedIn exposes a public guest API for job listings that requires no login or authentication. In this guide, I'll show you exactly how to use it with Python.
Why Scrape LinkedIn Jobs?
- Market research: Track hiring trends by industry, location, or company size
- Competitive intelligence: Monitor what roles your competitors are filling
- Job board aggregation: Feed listings into your own platform
- Salary analysis: Combine with other data sources for compensation benchmarking
- Academic research: Study labor market dynamics at scale
The LinkedIn Jobs Guest API
LinkedIn serves job listings to non-logged-in visitors through a public-facing endpoint. This is the same data you see when you Google "software engineer jobs LinkedIn" and click through without signing in.
The base endpoint:
https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search
Key Parameters
| Parameter | Description | Example |
|---|---|---|
keywords |
Job title or search terms | python developer |
location |
City, state, or country | San Francisco, CA |
geoId |
LinkedIn geo identifier |
103644278 (US) |
f_TPR |
Time posted filter |
r86400 (past 24h) |
f_E |
Experience level |
2 (entry), 3 (associate) |
start |
Pagination offset |
0, 25, 50... |
f_C |
Company ID filter |
1441 (Google) |
Basic Scraper: Fetching Job Listings
Let's build a practical scraper step by step.
import requests
from bs4 import BeautifulSoup
import time
import json
def scrape_linkedin_jobs(keywords, location, num_pages=3):
base_url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
all_jobs = []
for page in range(num_pages):
params = {
"keywords": keywords,
"location": location,
"start": page * 25,
"f_TPR": "r604800", # Past week
}
headers = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
}
response = requests.get(base_url, params=params, headers=headers)
if response.status_code != 200:
print(f"Page {page}: HTTP {response.status_code}")
break
soup = BeautifulSoup(response.text, "html.parser")
job_cards = soup.find_all("div", class_="base-card")
for card in job_cards:
title_el = card.find("h3", class_="base-search-card__title")
company_el = card.find("h4", class_="base-search-card__subtitle")
location_el = card.find("span", class_="job-search-card__location")
link_el = card.find("a", class_="base-card__full-link")
date_el = card.find("time")
job = {
"title": title_el.text.strip() if title_el else None,
"company": company_el.text.strip() if company_el else None,
"location": location_el.text.strip() if location_el else None,
"url": link_el["href"].split("?")[0] if link_el else None,
"posted": date_el["datetime"] if date_el else None,
}
all_jobs.append(job)
print(f"Page {page + 1}: Found {len(job_cards)} jobs")
time.sleep(2) # Be respectful with rate limiting
return all_jobs
# Usage
jobs = scrape_linkedin_jobs("data engineer", "New York, NY", num_pages=5)
print(f"\nTotal jobs found: {len(jobs)}")
with open("linkedin_jobs.json", "w") as f:
json.dump(jobs, f, indent=2)
Extracting Job Details
Each listing has a detail page you can fetch for full descriptions:
def get_job_details(job_url):
headers = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
}
response = requests.get(job_url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
description = soup.find("div", class_="show-more-less-html__markup")
criteria = soup.find_all("li", class_="description__job-criteria-item")
details = {
"description": description.get_text(strip=True) if description else None,
"criteria": {}
}
for item in criteria:
header = item.find("h3")
value = item.find("span")
if header and value:
details["criteria"][header.text.strip()] = value.text.strip()
return details
Advanced Filtering
LinkedIn's guest API supports powerful filtering. Here's how to combine filters:
def search_with_filters(
keywords,
location,
experience_level=None,
job_type=None,
posted_within=None,
company_id=None,
remote=False
):
params = {
"keywords": keywords,
"location": location,
}
# Experience: 1=Internship, 2=Entry, 3=Associate, 4=Mid-Senior, 5=Director, 6=Executive
if experience_level:
params["f_E"] = experience_level
# Job types: F=Full-time, P=Part-time, C=Contract, T=Temporary, I=Internship
if job_type:
params["f_JT"] = job_type
# Time filters: r86400=24h, r604800=week, r2592000=month
if posted_within:
params["f_TPR"] = posted_within
if company_id:
params["f_C"] = company_id
if remote:
params["f_WT"] = "2"
return params
# Example: Remote senior Python jobs posted in last 24 hours
filters = search_with_filters(
keywords="python developer",
location="United States",
experience_level="4",
job_type="F",
posted_within="r86400",
remote=True
)
Handling Pagination at Scale
When scraping thousands of listings, you need robust pagination:
def paginate_all_results(keywords, location, max_results=500):
base_url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
all_jobs = []
start = 0
consecutive_empty = 0
while len(all_jobs) < max_results:
params = {
"keywords": keywords,
"location": location,
"start": start,
}
response = requests.get(base_url, params=params, headers={
"User-Agent": "Mozilla/5.0 (compatible; JobBot/1.0)"
})
if response.status_code == 429:
print("Rate limited. Waiting 60 seconds...")
time.sleep(60)
continue
soup = BeautifulSoup(response.text, "html.parser")
cards = soup.find_all("div", class_="base-card")
if not cards:
consecutive_empty += 1
if consecutive_empty >= 3:
break
time.sleep(5)
start += 25
continue
consecutive_empty = 0
for card in cards:
title = card.find("h3", class_="base-search-card__title")
company = card.find("h4", class_="base-search-card__subtitle")
loc = card.find("span", class_="job-search-card__location")
link = card.find("a", class_="base-card__full-link")
all_jobs.append({
"title": title.text.strip() if title else "",
"company": company.text.strip() if company else "",
"location": loc.text.strip() if loc else "",
"url": link["href"].split("?")[0] if link else "",
})
start += 25
time.sleep(2)
return all_jobs
Dealing with Rate Limits and Blocks
LinkedIn will throttle or block you if you hit their servers too aggressively. Here are practical strategies:
1. Rotate User Agents
import random
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
]
headers = {"User-Agent": random.choice(USER_AGENTS)}
2. Use a Proxy Aggregator
For production workloads, a proxy rotation service is essential. ScrapeOps is a proxy aggregator that routes your requests through multiple proxy providers, automatically finding the cheapest working proxy for each request:
def scrape_with_proxy(url, params):
proxy_params = {
"api_key": "YOUR_SCRAPEOPS_KEY",
"url": requests.Request("GET", url, params=params).prepare().url,
}
response = requests.get(
"https://proxy.scrapeops.io/v1/",
params=proxy_params
)
return response
3. Managed Scraping API
If you'd rather skip the infrastructure altogether, ScraperAPI handles proxy rotation, CAPTCHAs, and retries for you:
def scrape_with_scraperapi(url):
payload = {
"api_key": "YOUR_SCRAPERAPI_KEY",
"url": url,
}
response = requests.get("https://api.scraperapi.com", params=payload)
return response
Saving Results to CSV
import csv
def save_to_csv(jobs, filename="linkedin_jobs.csv"):
if not jobs:
return
fieldnames = jobs[0].keys()
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(jobs)
print(f"Saved {len(jobs)} jobs to {filename}")
Legal and Ethical Considerations
- Public data only: This guide covers publicly accessible job listings — the same data visible to any Google searcher without a LinkedIn account
- Respect robots.txt: Check LinkedIn's robots.txt and honor crawl-delay directives
- Rate limit yourself: Add delays between requests (2-5 seconds minimum)
- Don't scrape profiles: Personal profile data has different legal implications than public job posts
- Check LinkedIn's ToS: Terms change — review them periodically
- GDPR considerations: If you're storing data about EU individuals, ensure compliance
Complete Working Example
Here's the full script you can run right now:
import requests
from bs4 import BeautifulSoup
import json
import time
def scrape_linkedin_jobs(keywords, location, num_pages=5):
base_url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
all_jobs = []
for page in range(num_pages):
params = {
"keywords": keywords,
"location": location,
"start": page * 25,
"f_TPR": "r604800",
}
response = requests.get(base_url, params=params, headers={
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
})
if response.status_code != 200:
break
soup = BeautifulSoup(response.text, "html.parser")
for card in soup.find_all("div", class_="base-card"):
title = card.find("h3", class_="base-search-card__title")
company = card.find("h4", class_="base-search-card__subtitle")
loc = card.find("span", class_="job-search-card__location")
link = card.find("a", class_="base-card__full-link")
all_jobs.append({
"title": title.text.strip() if title else "",
"company": company.text.strip() if company else "",
"location": loc.text.strip() if loc else "",
"url": link["href"].split("?")[0] if link else "",
})
time.sleep(2)
return all_jobs
if __name__ == "__main__":
jobs = scrape_linkedin_jobs("software engineer", "San Francisco, CA")
with open("linkedin_jobs.json", "w") as f:
json.dump(jobs, f, indent=2)
print(f"Scraped {len(jobs)} jobs")
Wrapping Up
LinkedIn's public jobs API is a goldmine for job market data. The key principles:
- Use the guest API — no login needed for public job listings
-
Paginate with the
startparameter — increment by 25 -
Filter aggressively — use
f_TPR,f_E,f_JTto narrow results - Respect rate limits — add delays, rotate user agents
- Use proxies for scale — ScrapeOps for proxy aggregation or ScraperAPI for fully managed scraping
Happy scraping!
Top comments (0)