DEV Community

agenthustler
agenthustler

Posted on

Scraping App Stores: Google Play and Apple App Store Data

App store data is incredibly valuable for market research, competitive analysis, and ASO. Here's how to scrape both major app stores.

What Data Can You Extract?

  • App names, descriptions, and categories
  • Ratings and review counts
  • Download estimates
  • Update history and changelogs
  • Developer information

Google Play Store Scraping

pip install requests beautifulsoup4 playwright
Enter fullscreen mode Exit fullscreen mode

Scraping App Details

import requests
from bs4 import BeautifulSoup
import time

class GooglePlayScraper:
    BASE_URL = "https://play.google.com"

    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                          "AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
            "Accept-Language": "en-US,en;q=0.9"
        })

    def get_app_details(self, app_id):
        url = f"{self.BASE_URL}/store/apps/details?id={app_id}&hl=en"
        response = self.session.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        title = soup.find("meta", property="og:title")
        desc = soup.find("meta", property="og:description")
        rating_el = soup.find("div", class_="TT9eCd")
        return {
            "app_id": app_id,
            "title": title["content"] if title else "",
            "description": desc["content"] if desc else "",
            "rating": rating_el.text if rating_el else "N/A"
        }

    def search_apps(self, query, num_results=20):
        url = f"{self.BASE_URL}/store/search?q={query}&c=apps&hl=en"
        response = self.session.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        apps = []
        seen = set()
        for link in soup.find_all("a", href=True):
            href = link.get("href", "")
            if "/store/apps/details?id=" in href:
                app_id = href.split("id=")[1].split("&")[0]
                if app_id not in seen:
                    seen.add(app_id)
                    title_el = link.find("span")
                    apps.append({
                        "app_id": app_id,
                        "title": title_el.text if title_el else app_id
                    })
        return apps[:num_results]
Enter fullscreen mode Exit fullscreen mode

Apple App Store Scraping

Apple provides iTunes lookup and search APIs:

class AppStoreScraper:
    LOOKUP_URL = "https://itunes.apple.com/lookup"
    SEARCH_URL = "https://itunes.apple.com/search"

    def lookup_app(self, app_id):
        params = {"id": app_id, "country": "us"}
        response = requests.get(self.LOOKUP_URL, params=params)
        data = response.json()
        if data["resultCount"] > 0:
            app = data["results"][0]
            return {
                "name": app["trackName"],
                "developer": app["artistName"],
                "price": app.get("formattedPrice", "Free"),
                "rating": app.get("averageUserRating", 0),
                "rating_count": app.get("userRatingCount", 0),
                "category": app.get("primaryGenreName", ""),
                "version": app.get("version", "")
            }
        return None

    def search_apps(self, term, country="us", limit=25):
        params = {"term": term, "country": country, "media": "software", "limit": limit}
        response = requests.get(self.SEARCH_URL, params=params)
        return [{
            "id": app["trackId"],
            "name": app["trackName"],
            "developer": app["artistName"],
            "rating": app.get("averageUserRating", 0),
            "price": app.get("formattedPrice", "Free")
        } for app in response.json().get("results", [])]

    def get_reviews(self, app_id, country="us", page=1):
        url = (f"https://itunes.apple.com/{country}/rss/"
               f"customerreviews/id={app_id}/sortBy=mostRecent/page={page}/json")
        response = requests.get(url)
        entries = response.json().get("feed", {}).get("entry", [])
        return [{
            "title": e.get("title", {}).get("label", ""),
            "content": e.get("content", {}).get("label", ""),
            "rating": e.get("im:rating", {}).get("label", ""),
            "author": e.get("author", {}).get("name", {}).get("label", "")
        } for e in entries if "content" in e]
Enter fullscreen mode Exit fullscreen mode

Market Intelligence Tool

class AppMarketAnalyzer:
    def __init__(self):
        self.google = GooglePlayScraper()
        self.apple = AppStoreScraper()

    def competitive_analysis(self, search_term):
        google_apps = self.google.search_apps(search_term)
        apple_apps = self.apple.search_apps(search_term)
        return {
            "google_play": google_apps[:10],
            "app_store": apple_apps[:10],
            "query": search_term
        }

    def track_app(self, google_id=None, apple_id=None):
        data = {"timestamp": time.time()}
        if google_id:
            data["google"] = self.google.get_app_details(google_id)
        if apple_id:
            data["apple"] = self.apple.lookup_app(apple_id)
        return data
Enter fullscreen mode Exit fullscreen mode

Scaling with Proxies

For large-scale monitoring, ScraperAPI handles Google Play's anti-bot measures. ThorData provides residential IPs for both stores. Track scrapers with ScrapeOps.

Conclusion

App store scraping unlocks powerful market intelligence. Apple's iTunes API makes it straightforward, while Google Play requires more creative approaches. Combine data from both stores for complete market coverage, and cache results to minimize your footprint.

Top comments (0)