DEV Community

Alex Spinov
Alex Spinov

Posted on

Semantic Scholar API: Search 200M+ Papers with AI-Powered Recommendations (Free Key)

Why Semantic Scholar?

While OpenAlex and Crossref give you raw metadata, Semantic Scholar adds something they can't: AI-powered paper recommendations and TLDR summaries generated by their S2 models.

Built by the Allen Institute for AI, it indexes 200M+ papers and provides:

  • AI-generated TLDRs for papers
  • Citation intent classification (is this paper supporting, contrasting, or just mentioning?)
  • Paper recommendations ("papers like this one")
  • Author disambiguation
  • Free API key with 100 requests per 5 minutes

Get Your Free API Key

  1. Go to semanticscholar.org/product/api
  2. Sign up (free)
  3. Get your API key

Or use without a key (lower rate limits).

Search Papers

import requests

API_KEY = "your-key-here"  # Optional but recommended
headers = {"x-api-key": API_KEY} if API_KEY != "your-key-here" else {}

def search(query, limit=5):
    resp = requests.get("https://api.semanticscholar.org/graph/v1/paper/search", params={
        "query": query,
        "limit": limit,
        "fields": "title,year,citationCount,tldr,openAccessPdf,authors"
    }, headers=headers)

    for paper in resp.json().get("data", []):
        print(f"[{paper.get('year')}] {paper['title']}")
        print(f"  Citations: {paper['citationCount']}")
        if paper.get("tldr"):
            print(f"  TLDR: {paper['tldr']['text']}")
        if paper.get("openAccessPdf"):
            print(f"  PDF: {paper['openAccessPdf']['url']}")
        authors = ", ".join(a["name"] for a in paper.get("authors", [])[:3])
        print(f"  Authors: {authors}")
        print()

search("retrieval augmented generation")
Enter fullscreen mode Exit fullscreen mode

Get AI-Generated Paper Summaries

def paper_details(paper_id):
    """Get full details including TLDR and references"""
    fields = "title,year,abstract,tldr,citationCount,referenceCount,openAccessPdf,fieldsOfStudy"
    resp = requests.get(
        f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}",
        params={"fields": fields},
        headers=headers
    )
    paper = resp.json()

    print(f"Title: {paper['title']}")
    print(f"Year: {paper.get('year')}")
    print(f"Citations: {paper['citationCount']} | References: {paper['referenceCount']}")
    if paper.get("tldr"):
        print(f"\nTLDR: {paper['tldr']['text']}")
    if paper.get("abstract"):
        print(f"\nAbstract: {paper['abstract'][:300]}...")
    if paper.get("fieldsOfStudy"):
        print(f"\nFields: {', '.join(paper['fieldsOfStudy'])}")

# "Attention Is All You Need"
paper_details("204e3073870fae3d05bcbc2f6a8e263d9b72e776")
Enter fullscreen mode Exit fullscreen mode

Find Similar Papers (AI Recommendations)

def recommendations(paper_id, limit=5):
    """Get AI-recommended similar papers"""
    resp = requests.get(
        f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}",
        params={"limit": limit, "fields": "title,year,citationCount,tldr"},
        headers=headers
    )

    print("Recommended papers:")
    for paper in resp.json().get("recommendedPapers", []):
        print(f"  [{paper.get('year')}] {paper['title']} ({paper['citationCount']} cites)")
        if paper.get("tldr"):
            print(f"    TLDR: {paper['tldr']['text'][:150]}...")
        print()

recommendations("204e3073870fae3d05bcbc2f6a8e263d9b72e776")
Enter fullscreen mode Exit fullscreen mode

Author Profiles with h-index

def author_search(name):
    resp = requests.get("https://api.semanticscholar.org/graph/v1/author/search", params={
        "query": name,
        "fields": "name,hIndex,citationCount,paperCount,affiliations",
        "limit": 3
    }, headers=headers)

    for author in resp.json().get("data", []):
        print(f"{author['name']}")
        print(f"  h-index: {author.get('hIndex', 'N/A')}")
        print(f"  Papers: {author.get('paperCount', 'N/A')}")
        print(f"  Citations: {author.get('citationCount', 'N/A')}")
        if author.get("affiliations"):
            print(f"  Affiliations: {', '.join(author['affiliations'])}")
        print()

author_search("Ilya Sutskever")
Enter fullscreen mode Exit fullscreen mode

Citation Intent (Why Papers Cite Each Other)

One of Semantic Scholar's unique features: it classifies WHY a paper cites another.

def citation_context(paper_id, limit=5):
    resp = requests.get(
        f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations",
        params={"fields": "title,citationCount,contexts,intents", "limit": limit},
        headers=headers
    )

    for cite in resp.json().get("data", []):
        paper = cite["citingPaper"]
        print(f"{paper.get('title', 'N/A')}")
        if cite.get("intents"):
            print(f"  Intent: {', '.join(cite['intents'])}")
        if cite.get("contexts"):
            print(f"  Context: {cite['contexts'][0][:150]}...")
        print()

citation_context("204e3073870fae3d05bcbc2f6a8e263d9b72e776")
Enter fullscreen mode Exit fullscreen mode

Comparison: Semantic Scholar vs Others

Feature Semantic Scholar OpenAlex Crossref
Papers 200M+ 250M+ 140M+
AI TLDRs Yes No No
Recommendations Yes No No
Citation Intent Yes No No
API Key Free (recommended) Not needed Not needed
Abstracts Usually Sometimes Rarely
Open Access PDFs Direct links Via Unpaywall No
Best For AI/NLP research, recommendations Discovery, metrics DOI metadata

Build a Complete Research Pipeline

Use all three together:

# 1. Discover papers (OpenAlex — broadest coverage)
# 2. Get AI summaries + recommendations (Semantic Scholar)
# 3. Get canonical metadata (Crossref)

def research_pipeline(query):
    # Step 1: Find papers
    discovery = requests.get("https://api.openalex.org/works",
        params={"search": query, "sort": "cited_by_count:desc", "per_page": 3}).json()

    for work in discovery["results"]:
        doi = work.get("doi", "").replace("https://doi.org/", "")
        if not doi:
            continue

        # Step 2: Get AI summary
        s2 = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}",
            params={"fields": "tldr,title"}, headers=headers).json()

        print(f"\n{s2.get('title', work['title'])}")
        if s2.get("tldr"):
            print(f"  AI Summary: {s2['tldr']['text']}")

        # Step 3: Get metadata
        cr = requests.get(f"https://api.crossref.org/works/{doi}").json()
        if "message" in cr:
            print(f"  Journal: {cr['message'].get('container-title', ['N/A'])[0]}")
            print(f"  Citations: {cr['message']['is-referenced-by-count']}")

research_pipeline("transformer attention mechanism")
Enter fullscreen mode Exit fullscreen mode

What's your favorite tool for literature research? I'm building a collection of free academic APIs — PRs welcome!

I write practical API tutorials weekly. Follow for more.


More from me: 10 Dev Tools I Use Daily | 77 Scrapers on a Schedule | 150+ Free APIs

Top comments (0)