Scraping Google Maps: Extracting Business Data at Scale

#python #tutorial #webdev #programming

Why Google Maps Data Is Valuable

Google Maps contains the most comprehensive database of local businesses on the planet. For lead generation, market research, and competitive analysis, this data is gold. Names, addresses, phone numbers, reviews, ratings, hours — all structured and queryable.

Let's build a scraper that extracts business data from Google Maps at scale.

The Challenge

Google Maps is a JavaScript-heavy single-page application. Traditional HTTP requests won't work — you need either browser automation or a specialized API proxy.

Approach 1: Using ScraperAPI with Google Maps

The simplest approach is using ScraperAPI which handles rendering and anti-bot protection:

import requests
import csv
from urllib.parse import quote
from bs4 import BeautifulSoup
import time

class GoogleMapsScraper:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base = "https://api.scraperapi.com"

    def search_places(self, query, location):
        """Search Google Maps for businesses."""
        search_url = f"https://www.google.com/maps/search/{quote(query)}/@{location},14z"

        resp = requests.get(self.base, params={
            "api_key": self.api_key,
            "url": search_url,
            "render": "true",
            "wait_for_selector": ".Nv2PK"
        })
        return self._parse_search_results(resp.text)

    def _parse_search_results(self, html):
        soup = BeautifulSoup(html, "html.parser")
        results = []

        for item in soup.select(".Nv2PK, [role=feed] > div"):
            name_el = item.select_one(".qBF1Pd, .fontHeadlineSmall")
            rating_el = item.select_one(".MW4etd")
            reviews_el = item.select_one(".UY7F9")
            category_el = item.select_one(".W4Efsd:nth-child(2) span:nth-child(1)")
            address_el = item.select_one(".W4Efsd:nth-child(2) span:nth-child(3)")

            if name_el:
                results.append({
                    "name": name_el.get_text(strip=True),
                    "rating": float(rating_el.get_text()) if rating_el else None,
                    "review_count": reviews_el.get_text(strip=True).strip("()") if reviews_el else None,
                    "category": category_el.get_text(strip=True) if category_el else None,
                    "address": address_el.get_text(strip=True) if address_el else None,
                })
        return results

    def get_place_details(self, place_url):
        """Get detailed information for a specific business."""
        resp = requests.get(self.base, params={
            "api_key": self.api_key,
            "url": place_url,
            "render": "true"
        })
        return self._parse_place_details(resp.text)

    def _parse_place_details(self, html):
        soup = BeautifulSoup(html, "html.parser")
        details = {}

        name = soup.select_one("h1.DUwDvf")
        details["name"] = name.get_text(strip=True) if name else None

        phone_btn = soup.select_one("[data-tooltip='Copy phone number']")
        if phone_btn:
            details["phone"] = phone_btn.get_text(strip=True)

        web_btn = soup.select_one("[data-tooltip='Open website']")
        if web_btn:
            details["website"] = web_btn.get("href")

        return details

    def export_csv(self, results, filename):
        if not results:
            return
        with open(filename, "w", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=results[0].keys())
            writer.writeheader()
            writer.writerows(results)
        print(f"Exported {len(results)} results to {filename}")

Approach 2: Playwright Browser Automation

For more control, use headless browser automation:

import asyncio
from playwright.async_api import async_playwright

async def scrape_google_maps(query, max_results=100):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        await page.goto(
            f"https://www.google.com/maps/search/{query}",
            wait_until="networkidle"
        )

        panel = page.locator('[role="feed"]')

        for scroll in range(10):
            await panel.evaluate("el => el.scrollTo(0, el.scrollHeight)")
            await asyncio.sleep(1.5)

            items = await page.locator(".Nv2PK").all()
            if len(items) >= max_results:
                break

        results = []
        for item in items[:max_results]:
            name = await item.locator(".qBF1Pd").text_content()
            results.append({"name": name})

        await browser.close()
        return results

Scaling Up

To scrape thousands of businesses:

Parallelize with asyncio — run multiple browser instances
Use proxy rotation — ThorData residential proxies prevent IP bans
Queue cities/categories — use Redis or a simple file-based queue
Deduplicate — Google Maps shows overlapping results for nearby areas

import asyncio

async def scrape_multiple_cities(scraper, cities, category):
    all_businesses = []
    for city in cities:
        query = f"{category} in {city}"
        results = scraper.search_places(query, city)
        all_businesses.extend(results)
        await asyncio.sleep(2)  # Stagger requests

    # Deduplicate by name + address
    seen = set()
    unique = []
    for biz in all_businesses:
        key = (biz.get("name"), biz.get("address"))
        if key not in seen:
            seen.add(key)
            unique.append(biz)
    return unique

Monitoring and Quality Control

Track your scraping success rates with ScrapeOps. Google Maps changes its DOM structure frequently, so monitoring helps you detect breakages early.

Ethical Considerations

Respect Google's rate limits
Do not scrape personal information beyond what is publicly listed
Use the data responsibly — do not enable spam calls or emails
Consider Google's Places API for smaller-scale needs

Conclusion

Google Maps scraping is one of the highest-ROI data collection projects. The business data you extract can power lead generation, market research, and location intelligence. Start with a proxy API for simplicity, and scale to browser automation when you need more control.