Why Semantic Scholar?
While OpenAlex and Crossref give you raw metadata, Semantic Scholar adds something they can't: AI-powered paper recommendations and TLDR summaries generated by their S2 models.
Built by the Allen Institute for AI, it indexes 200M+ papers and provides:
- AI-generated TLDRs for papers
- Citation intent classification (is this paper supporting, contrasting, or just mentioning?)
- Paper recommendations ("papers like this one")
- Author disambiguation
- Free API key with 100 requests per 5 minutes
Get Your Free API Key
- Go to semanticscholar.org/product/api
- Sign up (free)
- Get your API key
Or use without a key (lower rate limits).
Search Papers
import requests
API_KEY = "your-key-here" # Optional but recommended
headers = {"x-api-key": API_KEY} if API_KEY != "your-key-here" else {}
def search(query, limit=5):
resp = requests.get("https://api.semanticscholar.org/graph/v1/paper/search", params={
"query": query,
"limit": limit,
"fields": "title,year,citationCount,tldr,openAccessPdf,authors"
}, headers=headers)
for paper in resp.json().get("data", []):
print(f"[{paper.get('year')}] {paper['title']}")
print(f" Citations: {paper['citationCount']}")
if paper.get("tldr"):
print(f" TLDR: {paper['tldr']['text']}")
if paper.get("openAccessPdf"):
print(f" PDF: {paper['openAccessPdf']['url']}")
authors = ", ".join(a["name"] for a in paper.get("authors", [])[:3])
print(f" Authors: {authors}")
print()
search("retrieval augmented generation")
Get AI-Generated Paper Summaries
def paper_details(paper_id):
"""Get full details including TLDR and references"""
fields = "title,year,abstract,tldr,citationCount,referenceCount,openAccessPdf,fieldsOfStudy"
resp = requests.get(
f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}",
params={"fields": fields},
headers=headers
)
paper = resp.json()
print(f"Title: {paper['title']}")
print(f"Year: {paper.get('year')}")
print(f"Citations: {paper['citationCount']} | References: {paper['referenceCount']}")
if paper.get("tldr"):
print(f"\nTLDR: {paper['tldr']['text']}")
if paper.get("abstract"):
print(f"\nAbstract: {paper['abstract'][:300]}...")
if paper.get("fieldsOfStudy"):
print(f"\nFields: {', '.join(paper['fieldsOfStudy'])}")
# "Attention Is All You Need"
paper_details("204e3073870fae3d05bcbc2f6a8e263d9b72e776")
Find Similar Papers (AI Recommendations)
def recommendations(paper_id, limit=5):
"""Get AI-recommended similar papers"""
resp = requests.get(
f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}",
params={"limit": limit, "fields": "title,year,citationCount,tldr"},
headers=headers
)
print("Recommended papers:")
for paper in resp.json().get("recommendedPapers", []):
print(f" [{paper.get('year')}] {paper['title']} ({paper['citationCount']} cites)")
if paper.get("tldr"):
print(f" TLDR: {paper['tldr']['text'][:150]}...")
print()
recommendations("204e3073870fae3d05bcbc2f6a8e263d9b72e776")
Author Profiles with h-index
def author_search(name):
resp = requests.get("https://api.semanticscholar.org/graph/v1/author/search", params={
"query": name,
"fields": "name,hIndex,citationCount,paperCount,affiliations",
"limit": 3
}, headers=headers)
for author in resp.json().get("data", []):
print(f"{author['name']}")
print(f" h-index: {author.get('hIndex', 'N/A')}")
print(f" Papers: {author.get('paperCount', 'N/A')}")
print(f" Citations: {author.get('citationCount', 'N/A')}")
if author.get("affiliations"):
print(f" Affiliations: {', '.join(author['affiliations'])}")
print()
author_search("Ilya Sutskever")
Citation Intent (Why Papers Cite Each Other)
One of Semantic Scholar's unique features: it classifies WHY a paper cites another.
def citation_context(paper_id, limit=5):
resp = requests.get(
f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations",
params={"fields": "title,citationCount,contexts,intents", "limit": limit},
headers=headers
)
for cite in resp.json().get("data", []):
paper = cite["citingPaper"]
print(f"{paper.get('title', 'N/A')}")
if cite.get("intents"):
print(f" Intent: {', '.join(cite['intents'])}")
if cite.get("contexts"):
print(f" Context: {cite['contexts'][0][:150]}...")
print()
citation_context("204e3073870fae3d05bcbc2f6a8e263d9b72e776")
Comparison: Semantic Scholar vs Others
| Feature | Semantic Scholar | OpenAlex | Crossref |
|---|---|---|---|
| Papers | 200M+ | 250M+ | 140M+ |
| AI TLDRs | Yes | No | No |
| Recommendations | Yes | No | No |
| Citation Intent | Yes | No | No |
| API Key | Free (recommended) | Not needed | Not needed |
| Abstracts | Usually | Sometimes | Rarely |
| Open Access PDFs | Direct links | Via Unpaywall | No |
| Best For | AI/NLP research, recommendations | Discovery, metrics | DOI metadata |
Build a Complete Research Pipeline
Use all three together:
# 1. Discover papers (OpenAlex — broadest coverage)
# 2. Get AI summaries + recommendations (Semantic Scholar)
# 3. Get canonical metadata (Crossref)
def research_pipeline(query):
# Step 1: Find papers
discovery = requests.get("https://api.openalex.org/works",
params={"search": query, "sort": "cited_by_count:desc", "per_page": 3}).json()
for work in discovery["results"]:
doi = work.get("doi", "").replace("https://doi.org/", "")
if not doi:
continue
# Step 2: Get AI summary
s2 = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}",
params={"fields": "tldr,title"}, headers=headers).json()
print(f"\n{s2.get('title', work['title'])}")
if s2.get("tldr"):
print(f" AI Summary: {s2['tldr']['text']}")
# Step 3: Get metadata
cr = requests.get(f"https://api.crossref.org/works/{doi}").json()
if "message" in cr:
print(f" Journal: {cr['message'].get('container-title', ['N/A'])[0]}")
print(f" Citations: {cr['message']['is-referenced-by-count']}")
research_pipeline("transformer attention mechanism")
What's your favorite tool for literature research? I'm building a collection of free academic APIs — PRs welcome!
I write practical API tutorials weekly. Follow for more.
More from me: 10 Dev Tools I Use Daily | 77 Scrapers on a Schedule | 150+ Free APIs
Top comments (0)