Patent citation analysis reveals technological lineage. Automated prior art search saves thousands in legal fees.
USPTO PatentsView API
import requests, time
from collections import defaultdict
class PatentScraper:
def __init__(self):
self.s = requests.Session()
self.s.headers["User-Agent"] = "PatentResearch/1.0"
def search(self, query, n=50):
r = self.s.post("https://api.patentsview.org/patents/query", json={
"q":{"_text_any":{"patent_abstract":query}},
"f":["patent_number","patent_title","patent_date","patent_abstract",
"assignee_organization","cited_patent_number","citedby_patent_number"],
"o":{"page":1,"per_page":n},"s":[{"patent_date":"desc"}]})
return r.json().get("patents",[]) if r.status_code==200 else []
def details(self, pn):
r = self.s.post("https://api.patentsview.org/patents/query", json={
"q":{"patent_number":pn},
"f":["patent_number","patent_title","patent_date","patent_abstract",
"cited_patent_number","citedby_patent_number","assignee_organization"]})
ps = r.json().get("patents",[]) if r.status_code==200 else []
return ps[0] if ps else None
Citation Chain Builder
def chain(self, pn, depth=2):
visited = set()
result = {"nodes":[],"edges":[]}
def go(p, d):
if p in visited or d>depth: return
visited.add(p)
det = self.details(p)
if not det: return
asgn = det.get("assignees",[{}])
result["nodes"].append({"id":p,"title":det.get("patent_title",""),
"assignee":asgn[0].get("assignee_organization","?") if asgn else "?"})
for c in det.get("cited_patents",[])[:10]:
cpn = c.get("cited_patent_number")
if cpn:
result["edges"].append({"from":p,"to":cpn})
time.sleep(0.3); go(cpn, d+1)
go(pn, 0)
return result
def prior_art(self, desc, before=None):
qw = set(desc.lower().split())
results = []
for p in self.search(desc, 30):
pd = p.get("patent_date","9999")
if before and pd >= before: continue
aw = set(p.get("patent_abstract","").lower().split())
rel = len(qw&aw)/len(qw)*100 if qw else 0
results.append({"number":p["patent_number"],"title":p["patent_title"],
"date":pd,"relevance":rel})
return sorted(results, key=lambda x:x["relevance"], reverse=True)[:15]
s = PatentScraper()
c = s.chain("11023456", 1)
print(f"Nodes:{len(c['nodes'])} Edges:{len(c['edges'])}")
for p in s.prior_art("machine learning image classification")[:5]:
print(f" [{p['relevance']:.0f}%] {p['number']} - {p['title']}")
Scaling
ScraperAPI for Google Patents. ThorData for international offices. ScrapeOps for monitoring.
Uses
Due diligence, competitive intel, tech forecasting, licensing opportunities.
Skip the Build
You don't have to reinvent this. We maintain a production-grade scraper as an Apify actor — proxies, anti-bot, retries, and schema all handled. You can run it on a pay-per-result basis and get clean JSON without writing a single line of scraping code.
Top comments (0)