Upwork processes over $3.8 billion in freelancer earnings annually. Whether you're analyzing market rates, building a talent aggregator, studying demand trends by skill, or researching what clients are willing to pay — Upwork's data is incredibly valuable for anyone in the freelance economy.
In this guide, I'll walk you through scraping Upwork job listings and public freelancer profiles using Python, including strategies for dealing with their strict anti-scraping defenses.
Why Scrape Upwork?
- Market rate analysis: What are clients paying for React developers in 2026? For AI/ML engineers? Know the going rates before you set yours.
- Demand research: Which skills are trending? What's getting posted most frequently?
- Lead generation: Find active clients posting jobs in your niche
- Talent sourcing: Agencies can find freelancers with specific skill combinations
- Academic research: Labor market studies, gig economy analysis
The Challenge: Upwork's Anti-Bot Defenses
Let me be upfront — Upwork is one of the hardest sites to scrape in 2026. They use:
- Cloudflare Bot Management (not just basic Cloudflare)
- Aggressive device fingerprinting
- Login walls for most freelancer profile data
- Dynamic JavaScript rendering
- IP reputation scoring
You will not get far with plain requests. You need either browser automation or a scraping API that handles the heavy lifting.
Step 1: Scrape Public Job Listings
Upwork's job search results are partially accessible without login. Here's the approach using ScraperAPI with JavaScript rendering:
import requests
from bs4 import BeautifulSoup
import json
import time
from urllib.parse import quote
SCRAPER_API_KEY = "YOUR_SCRAPERAPI_KEY"
def scrape_upwork_jobs(query: str, pages: int = 5) -> list[dict]:
"""Scrape Upwork job listings for a given search query."""
jobs = []
for page in range(1, pages + 1):
encoded_query = quote(query)
url = f"https://www.upwork.com/nx/search/jobs/?q={encoded_query}&page={page}&sort=recency"
api_url = (
f"http://api.scraperapi.com"
f"?api_key={SCRAPER_API_KEY}"
f"&url={quote(url)}"
f"&render=true"
f"&country_code=us"
)
response = requests.get(api_url, timeout=90)
if response.status_code != 200:
print(f"Page {page} failed: {response.status_code}")
continue
soup = BeautifulSoup(response.text, "lxml")
# Extract job cards
for card in soup.select("article[data-test='JobTile']"):
title_el = card.select_one("h2 a")
desc_el = card.select_one("[data-test='job-description-text']")
budget_el = card.select_one("[data-test='budget']")
posted_el = card.select_one("[data-test='posted-on']")
skills_els = card.select("[data-test='token'] span")
job = {
"title": title_el.text.strip() if title_el else None,
"url": f"https://www.upwork.com{title_el['href']}" if title_el and title_el.get("href") else None,
"description": desc_el.text.strip()[:500] if desc_el else None,
"budget": budget_el.text.strip() if budget_el else None,
"posted": posted_el.text.strip() if posted_el else None,
"skills": [s.text.strip() for s in skills_els],
}
if job["title"]:
jobs.append(job)
print(f"Page {page}: {len(jobs)} total jobs")
time.sleep(3) # Respect rate limits
return jobs
# Search for Python developer jobs
jobs = scrape_upwork_jobs("python developer", pages=3)
for job in jobs[:5]:
print(f"{job['title']} — {job['budget']}")
print(f" Skills: {', '.join(job['skills'][:5])}")
Step 2: Scrape Freelancer Search Results
Public freelancer search pages show names, rates, skills, and job success scores:
def scrape_freelancers(skill: str, pages: int = 3) -> list[dict]:
"""Scrape freelancer profiles from search results."""
freelancers = []
for page in range(1, pages + 1):
url = f"https://www.upwork.com/nx/search/talent/?q={quote(skill)}&page={page}"
api_url = (
f"http://api.scraperapi.com"
f"?api_key={SCRAPER_API_KEY}"
f"&url={quote(url)}"
f"&render=true"
)
response = requests.get(api_url, timeout=90)
soup = BeautifulSoup(response.text, "lxml")
for card in soup.select("[data-test='freelancer-tile']"):
name_el = card.select_one("[data-test='tile-name']")
title_el = card.select_one("[data-test='tile-title']")
rate_el = card.select_one("[data-test='rate']")
score_el = card.select_one("[data-test='job-success']")
earned_el = card.select_one("[data-test='earned-amount']")
skills_els = card.select("[data-test='token'] span")
freelancers.append({
"name": name_el.text.strip() if name_el else None,
"title": title_el.text.strip() if title_el else None,
"hourly_rate": rate_el.text.strip() if rate_el else None,
"job_success": score_el.text.strip() if score_el else None,
"total_earned": earned_el.text.strip() if earned_el else None,
"skills": [s.text.strip() for s in skills_els],
})
time.sleep(3)
return freelancers
# Find top AI/ML freelancers
freelancers = scrape_freelancers("machine learning")
for f in freelancers[:5]:
print(f"{f['name']} — {f['hourly_rate']} — {f['job_success']}")
Step 3: Analyze the Market
Once you have the data, you can do powerful analysis:
import statistics
def analyze_rates(freelancers: list[dict]) -> dict:
"""Analyze hourly rates for a skill category."""
rates = []
for f in freelancers:
if f["hourly_rate"]:
try:
rate = float(f["hourly_rate"].replace("$", "").replace("/hr", ""))
rates.append(rate)
except ValueError:
continue
if not rates:
return {"error": "No rates found"}
return {
"count": len(rates),
"median": statistics.median(rates),
"mean": round(statistics.mean(rates), 2),
"min": min(rates),
"max": max(rates),
"p25": round(statistics.quantiles(rates, n=4)[0], 2),
"p75": round(statistics.quantiles(rates, n=4)[2], 2),
}
def analyze_skills_demand(jobs: list[dict]) -> dict:
"""Find the most in-demand skills from job listings."""
skill_counts = {}
for job in jobs:
for skill in job.get("skills", []):
skill_counts[skill] = skill_counts.get(skill, 0) + 1
return dict(sorted(skill_counts.items(), key=lambda x: x[1], reverse=True)[:20])
# Run analysis
rate_analysis = analyze_rates(freelancers)
print(f"ML Freelancer Rates:")
print(f" Median: ${rate_analysis['median']}/hr")
print(f" Range: ${rate_analysis['min']} - ${rate_analysis['max']}/hr")
print(f" 25th-75th percentile: ${rate_analysis['p25']} - ${rate_analysis['p75']}/hr")
demand = analyze_skills_demand(jobs)
print(f"\nTop Skills in Demand:")
for skill, count in list(demand.items())[:10]:
print(f" {skill}: {count} jobs")
Using Playwright for More Control
If you need to interact with the page — scroll to load more results, click filters — use Playwright with residential proxies from ThorData:
from playwright.sync_api import sync_playwright
import time
def scrape_with_playwright(query: str, proxy_url: str) -> list[dict]:
"""Scrape Upwork using browser automation with proxy."""
jobs = []
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
proxy={"server": proxy_url}
)
context = browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
page = context.new_page()
page.goto(f"https://www.upwork.com/nx/search/jobs/?q={query}")
# Wait for job tiles to load
page.wait_for_selector("article[data-test='JobTile']", timeout=15000)
# Scroll to trigger lazy loading
for _ in range(3):
page.evaluate("window.scrollBy(0, 800)")
time.sleep(1)
# Extract data
cards = page.query_selector_all("article[data-test='JobTile']")
for card in cards:
title = card.query_selector("h2 a")
jobs.append({
"title": title.inner_text() if title else None,
"url": title.get_attribute("href") if title else None,
})
browser.close()
return jobs
What Works: A Comparison
Here's what I've found works for Upwork scraping in 2026:
| Method | Jobs | Profiles | Reliability |
|---|---|---|---|
| Raw requests | Blocked | Blocked | ~0% |
| ScraperAPI + render | Works | Partial | ~80% |
| Playwright + residential proxy | Works | Works | ~90% |
| Apify cloud actors | Works | Works | ~95% |
For production use, I'd recommend:
- ScraperAPI for quick extraction — handles Cloudflare, rotates IPs, renders JavaScript
- ThorData proxies paired with Playwright if you need full browser control
- Pre-built Apify actors if you want zero maintenance
Storing Results for Trend Analysis
Set up a SQLite database to track market trends over time:
import sqlite3
from datetime import datetime
def store_jobs(jobs: list[dict], db_path: str = "upwork_market.db"):
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
budget TEXT,
skills TEXT,
posted TEXT,
scraped_at TEXT
)
""")
for job in jobs:
conn.execute(
"INSERT INTO jobs (title, budget, skills, posted, scraped_at) VALUES (?, ?, ?, ?, ?)",
(job["title"], job.get("budget"), ", ".join(job.get("skills", [])),
job.get("posted"), datetime.now().isoformat())
)
conn.commit()
conn.close()
print(f"Stored {len(jobs)} jobs")
def get_trending_skills(db_path: str = "upwork_market.db", days: int = 7):
"""Find skills with increasing demand over the past N days."""
conn = sqlite3.connect(db_path)
cursor = conn.execute("""
SELECT skills, COUNT(*) as job_count
FROM jobs
WHERE scraped_at >= datetime('now', ?)
GROUP BY skills
ORDER BY job_count DESC
LIMIT 20
""", (f'-{days} days',))
trends = cursor.fetchall()
conn.close()
return trends
Legal and Ethical Considerations
Upwork's ToS explicitly prohibits scraping. Be aware of the implications:
- Public search results are generally considered fair game legally, but Upwork may send cease-and-desist letters
- Never scrape private messages, contracts, or earnings data — that's clearly off-limits
- Don't use scraped data to spam freelancers or clients
- Rate limit your requests — hammering their servers can get your IP permanently banned
- This guide is for educational purposes and legitimate market research
Conclusion
Scraping Upwork gives you powerful insights into the freelance market — rate trends, skill demand, and competitive intelligence that would take hours to gather manually. The anti-bot protection is tough, but tools like ScraperAPI and ThorData residential proxies make it manageable.
For the easiest path, check out ready-made scrapers on Apify that handle all the edge cases automatically.
Questions about scraping Upwork or other freelance platforms? Let me know in the comments!
Top comments (0)