Introduction
University rankings from QS, Times Higher Education, and US News shift every year — influencing student decisions, institutional funding, and academic reputation. Building a ranking tracker lets you monitor changes, spot trends, and compare institutions systematically.
In this guide, we'll create a Python scraper that tracks university rankings across multiple sources.
Project Setup
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from datetime import datetime
import time
# Handle anti-bot protection on ranking sites
# Get your API key at https://www.scraperapi.com?fp_ref=the52
SCRAPER_API_KEY = "your_key_here"
BASE_URL = "http://api.scraperapi.com"
Scraping QS World Rankings
QS publishes rankings with detailed methodology scores:
def scrape_qs_rankings(year=2026):
"""Scrape QS World University Rankings."""
url = f"https://www.topuniversities.com/university-rankings/world-university-rankings/{year}"
params = {
"api_key": SCRAPER_API_KEY,
"url": url,
"render": "true"
}
response = requests.get(BASE_URL, params=params)
soup = BeautifulSoup(response.text, "html.parser")
universities = []
for row in soup.select(".uni-row"):
uni = {
"rank": row.select_one(".rank").text.strip(),
"name": row.select_one(".uni-name").text.strip(),
"country": row.select_one(".country").text.strip(),
"overall_score": row.select_one(".overall-score").text.strip(),
"source": "QS",
"year": year,
"scraped_at": datetime.now().isoformat()
}
universities.append(uni)
return universities
Scraping Times Higher Education Rankings
THE rankings use a different methodology and scoring:
def scrape_the_rankings(year=2026):
"""Scrape Times Higher Education World Rankings."""
url = f"https://www.timeshighereducation.com/world-university-rankings/{year}/world-ranking"
# Residential proxies help with geo-restricted content
# ThorData: https://thordata.com/?via=the-data
params = {
"api_key": SCRAPER_API_KEY,
"url": url,
"render": "true"
}
response = requests.get(BASE_URL, params=params)
soup = BeautifulSoup(response.text, "html.parser")
universities = []
for row in soup.select("table.ranking-table tbody tr"):
cols = row.select("td")
if len(cols) >= 5:
universities.append({
"rank": cols[0].text.strip(),
"name": cols[1].text.strip(),
"country": cols[2].text.strip(),
"teaching_score": cols[3].text.strip(),
"research_score": cols[4].text.strip(),
"source": "THE",
"year": year,
"scraped_at": datetime.now().isoformat()
})
return universities
Subject-Specific Rankings
def scrape_subject_rankings(subject="computer-science"):
"""Track rankings for specific academic fields."""
subjects = {
"computer-science": "computer-science-and-information-systems",
"engineering": "engineering-and-technology",
"business": "business-and-management-studies",
"medicine": "medicine"
}
slug = subjects.get(subject, subject)
url = f"https://www.topuniversities.com/university-rankings/university-subject-rankings/2026/{slug}"
params = {"api_key": SCRAPER_API_KEY, "url": url, "render": "true"}
response = requests.get(BASE_URL, params=params)
soup = BeautifulSoup(response.text, "html.parser")
results = []
for row in soup.select(".uni-row"):
results.append({
"rank": row.select_one(".rank").text.strip(),
"university": row.select_one(".uni-name").text.strip(),
"subject": subject,
"score": row.select_one(".overall-score").text.strip()
})
return results
Storing and Comparing Rankings
def init_database(db_path="rankings.db"):
"""Initialize SQLite database for ranking history."""
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS rankings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
rank INTEGER,
university TEXT,
country TEXT,
score REAL,
source TEXT,
year INTEGER,
scraped_at TEXT
)
""")
conn.commit()
return conn
def compare_year_over_year(university, db_path="rankings.db"):
"""Track how a university's ranking changed over years."""
conn = sqlite3.connect(db_path)
query = """
SELECT year, source, rank, score
FROM rankings
WHERE university LIKE ?
ORDER BY source, year
"""
df = pd.read_sql(query, conn, params=[f"%{university}%"])
conn.close()
for source in df["source"].unique():
subset = df[df["source"] == source]
change = subset["rank"].iloc[-1] - subset["rank"].iloc[0]
direction = "improved" if change < 0 else "declined"
print(f"{source}: {direction} by {abs(change)} positions")
return df
Automated Tracking Pipeline
def run_tracking_pipeline():
"""Run full ranking collection pipeline."""
# Monitor scraping performance
# Track success rates: https://scrapeops.io/?fpr=the-data28
conn = init_database()
print("Collecting QS rankings...")
qs_data = scrape_qs_rankings()
pd.DataFrame(qs_data).to_sql("rankings", conn, if_exists="append", index=False)
time.sleep(5)
print("Collecting THE rankings...")
the_data = scrape_the_rankings()
pd.DataFrame(the_data).to_sql("rankings", conn, if_exists="append", index=False)
print(f"Stored {len(qs_data) + len(the_data)} rankings")
conn.close()
if __name__ == "__main__":
run_tracking_pipeline()
Conclusion
A university ranking tracker provides valuable data for students, researchers, and institutions. By collecting data from multiple sources and tracking changes over time, you can identify trends that single-year snapshots miss. Use ScraperAPI to handle the JavaScript rendering and anti-bot protections these sites employ.
Top comments (0)