SEO professionals spend hours manually checking rankings. Web scraping automates rank tracking, backlink analysis, and competitive intelligence.
SERP Scraping
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
class SERPScraper:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({'User-Agent': 'Mozilla/5.0'})
def search(self, query, num=10):
resp = self.session.get('https://api.scraperapi.com', params={
'api_key': 'YOUR_KEY',
'url': f'https://www.google.com/search?q={query}&num={num}',
'render': 'true'
})
soup = BeautifulSoup(resp.text, 'html.parser')
results = []
for i, div in enumerate(soup.select('div.g'), 1):
link = div.select_one('a')
title = div.select_one('h3')
if link and title:
results.append({
'position': i,
'title': title.get_text(strip=True),
'url': link.get('href', ''),
'domain': urlparse(link.get('href', '')).netloc
})
return results
def track_position(self, keyword, domain):
for r in self.search(keyword, 100):
if domain in r.get('domain', ''):
return {'keyword': keyword, 'position': r['position']}
return {'keyword': keyword, 'position': None}
Bulk Rank Tracking
import sqlite3
from datetime import datetime
import time
class RankTracker:
def __init__(self, db='ranks.db'):
self.conn = sqlite3.connect(db)
self.conn.execute('''CREATE TABLE IF NOT EXISTS rankings (
id INTEGER PRIMARY KEY, keyword TEXT, domain TEXT,
position INTEGER, checked_at TIMESTAMP)''')
self.scraper = SERPScraper()
def track(self, keywords, domain, delay=5):
for kw in keywords:
result = self.scraper.track_position(kw, domain)
self.conn.execute('INSERT INTO rankings VALUES (NULL,?,?,?,?)',
(kw, domain, result['position'], datetime.now().isoformat()))
self.conn.commit()
print(f" {kw}: #{result['position'] or 'N/A'}")
time.sleep(delay)
Backlink Analysis
import json
class BacklinkAnalyzer:
def check_common_crawl(self, domain):
resp = requests.get('https://index.commoncrawl.org/CC-MAIN-2026-09-index',
params={'url': f'*.{domain}', 'output': 'json', 'limit': 100})
links = []
for line in resp.text.strip().split('\n'):
try:
entry = json.loads(line)
links.append({'source': entry.get('url'), 'status': entry.get('status')})
except: pass
return links
def find_opportunities(self, my_domain, competitors):
my_links = set(urlparse(l['source']).netloc for l in self.check_common_crawl(my_domain) if l.get('source'))
for comp in competitors:
comp_links = set(urlparse(l['source']).netloc for l in self.check_common_crawl(comp) if l.get('source'))
exclusive = comp_links - my_links
print(f"{comp}: {len(exclusive)} unique linking domains")
On-Page SEO Audit
class SEOAuditor:
def audit(self, url):
resp = requests.get(url, timeout=15)
soup = BeautifulSoup(resp.text, 'html.parser')
title = soup.find('title')
title_text = title.get_text(strip=True) if title else ''
meta = soup.find('meta', attrs={'name': 'description'})
meta_text = meta.get('content', '') if meta else ''
images = soup.find_all('img')
missing_alt = sum(1 for img in images if not img.get('alt'))
return {
'status': resp.status_code,
'load_time': resp.elapsed.total_seconds(),
'title_len': len(title_text),
'title_ok': 30 <= len(title_text) <= 60,
'meta_len': len(meta_text),
'h1_count': len(soup.find_all('h1')),
'images': len(images),
'missing_alt': missing_alt,
'has_schema': bool(soup.find('script', type='application/ld+json'))
}
Scaling
ScraperAPI is built for SERP scraping with auto proxy rotation. ThorData provides geo-targeted proxies, and ScrapeOps monitors SEO pipelines.
Conclusion
Build your own SEO tools for unlimited rank tracking and competitive intelligence without expensive subscriptions.
Top comments (0)