How to Scrape Freelancer Profiles for Market Rate Research

#python #tutorial #webdev #programming

Understanding freelancer market rates helps with hiring budgets, salary negotiations, and spotting emerging skills. This tutorial walks through scraping freelancer profiles from public platforms to build a market rate database.

What We'll Build

A scraper that collects:

Hourly rates by skill category
Experience levels and specializations
Geographic rate variations
Skill demand signals

Setup

pip install requests beautifulsoup4 pandas numpy

We'll use ScraperAPI since freelancer platforms have strong anti-scraping protections.

The Profile Scraper

import requests
from bs4 import BeautifulSoup
import re
import time
from datetime import datetime

SCRAPER_API_KEY = "YOUR_KEY"

def scrape_freelancer_listings(skill, page=1):
    """Scrape freelancer listings for a given skill."""
    url = f"https://www.freelancer.com/freelancers/{skill}/{page}"
    api_url = (
        f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}"
        f"&url={url}&render=true"
    )

    response = requests.get(api_url, timeout=60)
    soup = BeautifulSoup(response.text, "html.parser")

    profiles = []
    for card in soup.select(".freelancer-card"):
        name = card.select_one(".freelancer-name")
        rate = card.select_one(".hourly-rate")
        rating_el = card.select_one(".rating-score")
        country = card.select_one(".country")
        skills_el = card.select(".skill-tag")

        if name and rate:
            profiles.append({
                "name": name.text.strip(),
                "hourly_rate": parse_rate(rate.text.strip()),
                "rating": float(rating_el.text.strip()) if rating_el else None,
                "country": country.text.strip() if country else "Unknown",
                "skills": [s.text.strip() for s in skills_el],
                "search_skill": skill
            })

    return profiles

def parse_rate(rate_str):
    """Parse rate string like '$50/hr' into a number."""
    match = re.search(r'[\d.]+', rate_str.replace(",", ""))
    return float(match.group()) if match else 0.0

Multi-Platform Collection

def scrape_guru_profiles(skill, page=1):
    """Scrape Guru.com freelancer listings."""
    url = f"https://www.guru.com/d/freelancers/skill/{skill}/pg/{page}/"
    api_url = (
        f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}"
        f"&url={url}&render=true"
    )

    response = requests.get(api_url, timeout=60)
    soup = BeautifulSoup(response.text, "html.parser")

    profiles = []
    for card in soup.select(".freelancerListing"):
        name = card.select_one(".freelancerName")
        rate = card.select_one(".rateValue")
        location = card.select_one(".location")

        if name:
            profiles.append({
                "name": name.text.strip(),
                "hourly_rate": parse_rate(rate.text) if rate else 0,
                "country": location.text.strip() if location else "Unknown",
                "platform": "guru",
                "search_skill": skill
            })

    return profiles

def collect_market_data(skills, pages_per_skill=3):
    """Collect freelancer data across skills and platforms."""
    all_profiles = []

    for skill in skills:
        for page in range(1, pages_per_skill + 1):
            print(f"Scraping {skill} - page {page}")

            profiles = scrape_freelancer_listings(skill, page)
            all_profiles.extend(profiles)
            time.sleep(3)

            guru = scrape_guru_profiles(skill, page)
            all_profiles.extend(guru)
            time.sleep(3)

    return all_profiles

Rate Analysis

import pandas as pd
import numpy as np

def analyze_rates(profiles):
    """Analyze market rates from collected profiles."""
    df = pd.DataFrame(profiles)
    df = df[df["hourly_rate"] > 0]

    print("=== Market Rate Summary ===\n")

    for skill in df["search_skill"].unique():
        skill_data = df[df["search_skill"] == skill]["hourly_rate"]
        print(f"{skill}:")
        print(f"  Median: ${skill_data.median():.0f}/hr")
        print(f"  Mean:   ${skill_data.mean():.0f}/hr")
        print(f"  Range:  ${skill_data.min():.0f} - ${skill_data.max():.0f}/hr")
        print(f"  25th:   ${skill_data.quantile(0.25):.0f}/hr")
        print(f"  75th:   ${skill_data.quantile(0.75):.0f}/hr")
        print(f"  Sample: {len(skill_data)} profiles\n")

    print("\n=== Rates by Country (Top 10) ===\n")
    geo = df.groupby("country")["hourly_rate"].agg(["median", "count"])
    geo = geo[geo["count"] >= 5].sort_values("median", ascending=False)
    print(geo.head(10))

    return df

skills = ["python", "react", "machine-learning", "devops", "data-science"]
profiles = collect_market_data(skills, pages_per_skill=3)
df = analyze_rates(profiles)

Proxy Strategy

Freelancer platforms block aggressively. ScraperAPI with JS rendering handles most cases. For higher volumes, pair with ThorData residential proxies. Monitor success rates using ScrapeOps.

Ethical Considerations

Only scrape publicly visible profiles. Don't store personal contact information. Aggregate data for market research and never target individuals. Respect rate limits and robots.txt.

Conclusion

Automated market rate research gives you data-driven insights for hiring, pricing, and career planning. Run this monthly to track how rates shift across skills and regions.