Alex Spinov

Posted on Mar 25

Semantic Scholar API: Search 200M+ Papers with AI-Powered Recommendations (Free Key)

#python #ai #api #tutorial

Why Semantic Scholar?

While OpenAlex and Crossref give you raw metadata, Semantic Scholar adds something they can't: AI-powered paper recommendations and TLDR summaries generated by their S2 models.

Built by the Allen Institute for AI, it indexes 200M+ papers and provides:

AI-generated TLDRs for papers
Citation intent classification (is this paper supporting, contrasting, or just mentioning?)
Paper recommendations ("papers like this one")
Author disambiguation
Free API key with 100 requests per 5 minutes

Get Your Free API Key

Go to semanticscholar.org/product/api
Sign up (free)
Get your API key

Or use without a key (lower rate limits).

Search Papers

import requests

API_KEY = "your-key-here"  # Optional but recommended
headers = {"x-api-key": API_KEY} if API_KEY != "your-key-here" else {}

def search(query, limit=5):
    resp = requests.get("https://api.semanticscholar.org/graph/v1/paper/search", params={
        "query": query,
        "limit": limit,
        "fields": "title,year,citationCount,tldr,openAccessPdf,authors"
    }, headers=headers)

    for paper in resp.json().get("data", []):
        print(f"[{paper.get('year')}] {paper['title']}")
        print(f"  Citations: {paper['citationCount']}")
        if paper.get("tldr"):
            print(f"  TLDR: {paper['tldr']['text']}")
        if paper.get("openAccessPdf"):
            print(f"  PDF: {paper['openAccessPdf']['url']}")
        authors = ", ".join(a["name"] for a in paper.get("authors", [])[:3])
        print(f"  Authors: {authors}")
        print()

search("retrieval augmented generation")

Get AI-Generated Paper Summaries

def paper_details(paper_id):
    """Get full details including TLDR and references"""
    fields = "title,year,abstract,tldr,citationCount,referenceCount,openAccessPdf,fieldsOfStudy"
    resp = requests.get(
        f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}",
        params={"fields": fields},
        headers=headers
    )
    paper = resp.json()

    print(f"Title: {paper['title']}")
    print(f"Year: {paper.get('year')}")
    print(f"Citations: {paper['citationCount']} | References: {paper['referenceCount']}")
    if paper.get("tldr"):
        print(f"\nTLDR: {paper['tldr']['text']}")
    if paper.get("abstract"):
        print(f"\nAbstract: {paper['abstract'][:300]}...")
    if paper.get("fieldsOfStudy"):
        print(f"\nFields: {', '.join(paper['fieldsOfStudy'])}")

# "Attention Is All You Need"
paper_details("204e3073870fae3d05bcbc2f6a8e263d9b72e776")

Find Similar Papers (AI Recommendations)

def recommendations(paper_id, limit=5):
    """Get AI-recommended similar papers"""
    resp = requests.get(
        f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}",
        params={"limit": limit, "fields": "title,year,citationCount,tldr"},
        headers=headers
    )

    print("Recommended papers:")
    for paper in resp.json().get("recommendedPapers", []):
        print(f"  [{paper.get('year')}] {paper['title']} ({paper['citationCount']} cites)")
        if paper.get("tldr"):
            print(f"    TLDR: {paper['tldr']['text'][:150]}...")
        print()

recommendations("204e3073870fae3d05bcbc2f6a8e263d9b72e776")

Author Profiles with h-index

def author_search(name):
    resp = requests.get("https://api.semanticscholar.org/graph/v1/author/search", params={
        "query": name,
        "fields": "name,hIndex,citationCount,paperCount,affiliations",
        "limit": 3
    }, headers=headers)

    for author in resp.json().get("data", []):
        print(f"{author['name']}")
        print(f"  h-index: {author.get('hIndex', 'N/A')}")
        print(f"  Papers: {author.get('paperCount', 'N/A')}")
        print(f"  Citations: {author.get('citationCount', 'N/A')}")
        if author.get("affiliations"):
            print(f"  Affiliations: {', '.join(author['affiliations'])}")
        print()

author_search("Ilya Sutskever")

Citation Intent (Why Papers Cite Each Other)

One of Semantic Scholar's unique features: it classifies WHY a paper cites another.

def citation_context(paper_id, limit=5):
    resp = requests.get(
        f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations",
        params={"fields": "title,citationCount,contexts,intents", "limit": limit},
        headers=headers
    )

    for cite in resp.json().get("data", []):
        paper = cite["citingPaper"]
        print(f"{paper.get('title', 'N/A')}")
        if cite.get("intents"):
            print(f"  Intent: {', '.join(cite['intents'])}")
        if cite.get("contexts"):
            print(f"  Context: {cite['contexts'][0][:150]}...")
        print()

citation_context("204e3073870fae3d05bcbc2f6a8e263d9b72e776")

Comparison: Semantic Scholar vs Others

Feature	Semantic Scholar	OpenAlex	Crossref
Papers	200M+	250M+	140M+
AI TLDRs	Yes	No	No
Recommendations	Yes	No	No
Citation Intent	Yes	No	No
API Key	Free (recommended)	Not needed	Not needed
Abstracts	Usually	Sometimes	Rarely
Open Access PDFs	Direct links	Via Unpaywall	No
Best For	AI/NLP research, recommendations	Discovery, metrics	DOI metadata

Build a Complete Research Pipeline

Use all three together:

# 1. Discover papers (OpenAlex — broadest coverage)
# 2. Get AI summaries + recommendations (Semantic Scholar)
# 3. Get canonical metadata (Crossref)

def research_pipeline(query):
    # Step 1: Find papers
    discovery = requests.get("https://api.openalex.org/works",
        params={"search": query, "sort": "cited_by_count:desc", "per_page": 3}).json()

    for work in discovery["results"]:
        doi = work.get("doi", "").replace("https://doi.org/", "")
        if not doi:
            continue

        # Step 2: Get AI summary
        s2 = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}",
            params={"fields": "tldr,title"}, headers=headers).json()

        print(f"\n{s2.get('title', work['title'])}")
        if s2.get("tldr"):
            print(f"  AI Summary: {s2['tldr']['text']}")

        # Step 3: Get metadata
        cr = requests.get(f"https://api.crossref.org/works/{doi}").json()
        if "message" in cr:
            print(f"  Journal: {cr['message'].get('container-title', ['N/A'])[0]}")
            print(f"  Citations: {cr['message']['is-referenced-by-count']}")

research_pipeline("transformer attention mechanism")

What's your favorite tool for literature research? I'm building a collection of free academic APIs — PRs welcome!

I write practical API tutorials weekly. Follow for more.

Need web scraping or data extraction? I've built 77+ production scrapers. Email spinov001@gmail.com — quote in 2 hours. Or try my ready-made Apify actors — no code needed.

DEV Community