Patent citation analysis reveals technological lineage. Automated prior art search saves thousands in legal fees.
USPTO PatentsView API
import requests, time
from collections import defaultdict
class PatentScraper:
def __init__(self):
self.s = requests.Session()
self.s.headers["User-Agent"] = "PatentResearch/1.0"
def search(self, query, n=50):
r = self.s.post("https://api.patentsview.org/patents/query", json={
"q":{"_text_any":{"patent_abstract":query}},
"f":["patent_number","patent_title","patent_date","patent_abstract",
"assignee_organization","cited_patent_number","citedby_patent_number"],
"o":{"page":1,"per_page":n},"s":[{"patent_date":"desc"}]})
return r.json().get("patents",[]) if r.status_code==200 else []
def details(self, pn):
r = self.s.post("https://api.patentsview.org/patents/query", json={
"q":{"patent_number":pn},
"f":["patent_number","patent_title","patent_date","patent_abstract",
"cited_patent_number","citedby_patent_number","assignee_organization"]})
ps = r.json().get("patents",[]) if r.status_code==200 else []
return ps[0] if ps else None
Citation Chain Builder
def chain(self, pn, depth=2):
visited = set()
result = {"nodes":[],"edges":[]}
def go(p, d):
if p in visited or d>depth: return
visited.add(p)
det = self.details(p)
if not det: return
asgn = det.get("assignees",[{}])
result["nodes"].append({"id":p,"title":det.get("patent_title",""),
"assignee":asgn[0].get("assignee_organization","?") if asgn else "?"})
for c in det.get("cited_patents",[])[:10]:
cpn = c.get("cited_patent_number")
if cpn:
result["edges"].append({"from":p,"to":cpn})
time.sleep(0.3); go(cpn, d+1)
go(pn, 0)
return result
def prior_art(self, desc, before=None):
qw = set(desc.lower().split())
results = []
for p in self.search(desc, 30):
pd = p.get("patent_date","9999")
if before and pd >= before: continue
aw = set(p.get("patent_abstract","").lower().split())
rel = len(qw&aw)/len(qw)*100 if qw else 0
results.append({"number":p["patent_number"],"title":p["patent_title"],
"date":pd,"relevance":rel})
return sorted(results, key=lambda x:x["relevance"], reverse=True)[:15]
s = PatentScraper()
c = s.chain("11023456", 1)
print(f"Nodes:{len(c['nodes'])} Edges:{len(c['edges'])}")
for p in s.prior_art("machine learning image classification")[:5]:
print(f" [{p['relevance']:.0f}%] {p['number']} - {p['title']}")
Scaling
ScraperAPI for Google Patents. ThorData for international offices. ScrapeOps for monitoring.
Uses
Due diligence, competitive intel, tech forecasting, licensing opportunities.
Top comments (0)