DEV Community

agenthustler
agenthustler

Posted on

Scraping Academic Conference Acceptance Rates with Python

Academic conference acceptance rates are a key metric for researchers choosing where to submit. This data is scattered across individual conference sites — let's build a comprehensive tracker.

Why Acceptance Rates Matter

A conference's acceptance rate signals prestige, competition level, and review rigor. Tracking over time reveals which fields are growing and where new researchers have the best odds.

Scraping Conference Data

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

API_KEY = "YOUR_SCRAPERAPI_KEY"  # Get one at https://www.scraperapi.com?fp_ref=the52

def scrape_conference_stats(conference_url):
    proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={conference_url}"
    response = requests.get(proxy_url, timeout=60)
    soup = BeautifulSoup(response.text, 'html.parser')

    text = soup.get_text()
    stats = {}

    submissions_match = re.search(r'(\d+)\s*submissions?', text, re.IGNORECASE)
    accepted_match = re.search(r'(\d+)\s*(?:accepted|papers?\s*accepted)', text, re.IGNORECASE)
    rate_match = re.search(r'acceptance\s*rate[:\s]*(\d+\.?\d*)\s*%', text, re.IGNORECASE)

    if submissions_match:
        stats['submissions'] = int(submissions_match.group(1))
    if accepted_match:
        stats['accepted'] = int(accepted_match.group(1))
    if rate_match:
        stats['acceptance_rate'] = float(rate_match.group(1))
    elif 'submissions' in stats and 'accepted' in stats:
        stats['acceptance_rate'] = round(stats['accepted'] / stats['submissions'] * 100, 1)

    return stats

conferences = [
    {'name': 'NeurIPS 2025', 'url': 'https://neurips.cc/Conferences/2025'},
    {'name': 'ICML 2025', 'url': 'https://icml.cc/Conferences/2025'},
    {'name': 'CVPR 2025', 'url': 'https://cvpr.thecvf.com/Conferences/2025'},
    {'name': 'ACL 2025', 'url': 'https://2025.aclweb.org/'},
]

results = []
for conf in conferences:
    stats = scrape_conference_stats(conf['url'])
    stats['conference'] = conf['name']
    results.append(stats)
    print(f"{conf['name']}: {stats}")

df = pd.DataFrame(results)
df.to_csv('conference_acceptance_rates.csv', index=False)
Enter fullscreen mode Exit fullscreen mode

Scraping WikiCFP for Historical Data

def scrape_wikicfp(search_term):
    url = f"http://www.wikicfp.com/cfp/servlet/tool.search?q={search_term}&year=a"
    proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}"

    response = requests.get(proxy_url, timeout=60)
    soup = BeautifulSoup(response.text, 'html.parser')

    cfps = []
    for row in soup.select('table.contsec tr'):
        cells = row.select('td')
        if len(cells) >= 4:
            link = cells[0].select_one('a')
            cfps.append({
                'conference': link.text.strip() if link else '',
                'full_name': cells[1].text.strip(),
                'deadline': cells[2].text.strip(),
                'location': cells[3].text.strip()
            })
    return cfps

ml_conferences = scrape_wikicfp('machine learning')
print(f"Found {len(ml_conferences)} ML conference CFPs")
Enter fullscreen mode Exit fullscreen mode

Trend Analysis

Use ThorData for proxy rotation across academic sites:

import matplotlib.pyplot as plt

historical = pd.read_csv('conference_acceptance_rates.csv')
for conf in historical['conference'].unique():
    data = historical[historical['conference'] == conf].sort_values('year')
    plt.plot(data['year'], data['acceptance_rate'], marker='o', label=conf)

plt.xlabel('Year')
plt.ylabel('Acceptance Rate (%)')
plt.title('Conference Acceptance Rates Over Time')
plt.legend(bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.savefig('acceptance_trends.png', dpi=150)
Enter fullscreen mode Exit fullscreen mode

Insights

  • Top ML conferences dropped from ~25% to ~15% acceptance in 5 years
  • Workshop papers have 40-60% acceptance rates
  • Regional conferences offer 30-40% — great for early-career researchers
  • Monitor with ScrapeOps, use ScraperAPI for varied tech stacks

Top comments (0)