Google Search results — also called SERP (Search Engine Results Pages) — are one of the most valuable data sources for SEO research, rank tracking, competitor analysis, and market research. In this guide, I'll show you how to collect SERP data with Python.
Why Scrape Google SERPs?
- Rank tracking: Monitor where your site ranks for target keywords
- Competitor analysis: See who's ranking above you and what content they're using
- Keyword research: Discover related searches, People Also Ask, and featured snippets
- Ad intelligence: Track paid ad placements and copy
- Local SEO: Monitor local pack results across different locations
The Challenge with Google
Google is one of the hardest sites to scrape. They use sophisticated bot detection including CAPTCHAs, rate limiting, IP blocking, and behavioral analysis. Direct scraping is unreliable for production use.
Method 1: Using a SERP API
The most reliable approach for production is using a dedicated SERP API:
import requests
def search_google(query: str, num_results: int = 10) -> list[dict]:
params = {
"api_key": "YOUR_API_KEY",
"url": f"https://www.google.com/search?q={query}&num={num_results}",
"render": "true"
}
response = requests.get("https://api.scraperapi.com", params=params)
if response.status_code == 200:
return parse_serp_html(response.text)
return []
Get 5,000 free SERP API credits with ScraperAPI
Method 2: Parsing SERP HTML
Once you have the HTML (via API or otherwise), parse it with BeautifulSoup:
from bs4 import BeautifulSoup
def parse_serp_html(html: str) -> list[dict]:
soup = BeautifulSoup(html, "html.parser")
results = []
for g in soup.select("div.g"):
title_el = g.select_one("h3")
link_el = g.select_one("a[href]")
snippet_el = g.select_one("div.VwiC3b")
if title_el and link_el:
result = {
"title": title_el.get_text(),
"url": link_el["href"],
"snippet": snippet_el.get_text() if snippet_el else "",
"position": len(results) + 1,
}
results.append(result)
return results
# Usage
results = parse_serp_html(html_content)
for r in results:
print(f"#{r['position']} {r['title']}")
print(f" {r['url']}")
print(f" {r['snippet']}\n")
Method 3: Extracting Rich SERP Features
Modern SERPs include much more than ten blue links:
def parse_rich_serp(html: str) -> dict:
soup = BeautifulSoup(html, "html.parser")
serp_data = {
"organic": [],
"people_also_ask": [],
"related_searches": [],
"featured_snippet": None,
"knowledge_panel": None,
}
# Organic results
for g in soup.select("div.g"):
title_el = g.select_one("h3")
link_el = g.select_one("a[href]")
if title_el and link_el:
serp_data["organic"].append({
"title": title_el.get_text(),
"url": link_el["href"],
})
# People Also Ask
for paa in soup.select("div.related-question-pair"):
question = paa.select_one("span")
if question:
serp_data["people_also_ask"].append(question.get_text())
# Related searches
for rs in soup.select("div.s75CSd a"):
serp_data["related_searches"].append(rs.get_text())
# Featured snippet
snippet_block = soup.select_one("div.xpdopen")
if snippet_block:
serp_data["featured_snippet"] = snippet_block.get_text(strip=True)[:500]
return serp_data
Building a Rank Tracker
Here's a practical rank tracking script that monitors your positions over time:
import json
import csv
from datetime import datetime
from pathlib import Path
class RankTracker:
def __init__(self, domain: str, keywords: list[str]):
self.domain = domain
self.keywords = keywords
self.history_file = Path(f"rank_history_{domain.replace('.', '_')}.csv")
def check_ranking(self, keyword: str) -> int | None:
"""Check where domain ranks for a keyword."""
results = search_google(keyword, num_results=100)
for result in results:
if self.domain in result.get("url", ""):
return result["position"]
return None # Not in top 100
def run_check(self):
"""Check all keywords and save results."""
timestamp = datetime.now().isoformat()
results = []
for keyword in self.keywords:
position = self.check_ranking(keyword)
results.append({
"date": timestamp,
"keyword": keyword,
"position": position if position else "100+",
})
print(f" {keyword}: #{position or '100+'}")
self.save_results(results)
return results
def save_results(self, results: list[dict]):
file_exists = self.history_file.exists()
with open(self.history_file, "a", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["date", "keyword", "position"])
if not file_exists:
writer.writeheader()
writer.writerows(results)
# Usage
tracker = RankTracker(
domain="mysite.com",
keywords=["python web scraping", "scrape google results", "serp tracker python"]
)
tracker.run_check()
Handling Multiple Locations
For local SEO, you need SERP data from different geographic locations:
def search_local(query: str, location: str) -> list[dict]:
"""Search Google from a specific location."""
params = {
"api_key": "YOUR_API_KEY",
"url": f"https://www.google.com/search?q={query}&gl=us&uule={encode_location(location)}",
"render": "true"
}
response = requests.get("https://api.scraperapi.com", params=params)
return parse_serp_html(response.text)
# Compare rankings across cities
cities = ["New York", "Los Angeles", "Chicago", "Houston"]
for city in cities:
results = search_local("best pizza near me", city)
print(f"\n{city}:")
for r in results[:5]:
print(f" #{r['position']} {r['title']}")
Best Practices
- Use a SERP API for production — direct scraping is too unreliable
- Cache results — don't re-scrape the same query within 24 hours
- Respect rate limits — even with APIs, pace your requests
- Store historical data — rank trends are more valuable than snapshots
- Monitor multiple search features — featured snippets and PAA matter as much as organic rank
Conclusion
SERP scraping is essential for SEO professionals and marketers. While Google's defenses make direct scraping impractical, using a reliable API like ScraperAPI gives you consistent, structured SERP data for rank tracking, competitor analysis, and keyword research.
Happy scraping!
Top comments (0)