App store data is incredibly valuable for market research, competitive analysis, and ASO. Here's how to scrape both major app stores.
What Data Can You Extract?
- App names, descriptions, and categories
- Ratings and review counts
- Download estimates
- Update history and changelogs
- Developer information
Google Play Store Scraping
pip install requests beautifulsoup4 playwright
Scraping App Details
import requests
from bs4 import BeautifulSoup
import time
class GooglePlayScraper:
BASE_URL = "https://play.google.com"
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9"
})
def get_app_details(self, app_id):
url = f"{self.BASE_URL}/store/apps/details?id={app_id}&hl=en"
response = self.session.get(url)
soup = BeautifulSoup(response.text, "html.parser")
title = soup.find("meta", property="og:title")
desc = soup.find("meta", property="og:description")
rating_el = soup.find("div", class_="TT9eCd")
return {
"app_id": app_id,
"title": title["content"] if title else "",
"description": desc["content"] if desc else "",
"rating": rating_el.text if rating_el else "N/A"
}
def search_apps(self, query, num_results=20):
url = f"{self.BASE_URL}/store/search?q={query}&c=apps&hl=en"
response = self.session.get(url)
soup = BeautifulSoup(response.text, "html.parser")
apps = []
seen = set()
for link in soup.find_all("a", href=True):
href = link.get("href", "")
if "/store/apps/details?id=" in href:
app_id = href.split("id=")[1].split("&")[0]
if app_id not in seen:
seen.add(app_id)
title_el = link.find("span")
apps.append({
"app_id": app_id,
"title": title_el.text if title_el else app_id
})
return apps[:num_results]
Apple App Store Scraping
Apple provides iTunes lookup and search APIs:
class AppStoreScraper:
LOOKUP_URL = "https://itunes.apple.com/lookup"
SEARCH_URL = "https://itunes.apple.com/search"
def lookup_app(self, app_id):
params = {"id": app_id, "country": "us"}
response = requests.get(self.LOOKUP_URL, params=params)
data = response.json()
if data["resultCount"] > 0:
app = data["results"][0]
return {
"name": app["trackName"],
"developer": app["artistName"],
"price": app.get("formattedPrice", "Free"),
"rating": app.get("averageUserRating", 0),
"rating_count": app.get("userRatingCount", 0),
"category": app.get("primaryGenreName", ""),
"version": app.get("version", "")
}
return None
def search_apps(self, term, country="us", limit=25):
params = {"term": term, "country": country, "media": "software", "limit": limit}
response = requests.get(self.SEARCH_URL, params=params)
return [{
"id": app["trackId"],
"name": app["trackName"],
"developer": app["artistName"],
"rating": app.get("averageUserRating", 0),
"price": app.get("formattedPrice", "Free")
} for app in response.json().get("results", [])]
def get_reviews(self, app_id, country="us", page=1):
url = (f"https://itunes.apple.com/{country}/rss/"
f"customerreviews/id={app_id}/sortBy=mostRecent/page={page}/json")
response = requests.get(url)
entries = response.json().get("feed", {}).get("entry", [])
return [{
"title": e.get("title", {}).get("label", ""),
"content": e.get("content", {}).get("label", ""),
"rating": e.get("im:rating", {}).get("label", ""),
"author": e.get("author", {}).get("name", {}).get("label", "")
} for e in entries if "content" in e]
Market Intelligence Tool
class AppMarketAnalyzer:
def __init__(self):
self.google = GooglePlayScraper()
self.apple = AppStoreScraper()
def competitive_analysis(self, search_term):
google_apps = self.google.search_apps(search_term)
apple_apps = self.apple.search_apps(search_term)
return {
"google_play": google_apps[:10],
"app_store": apple_apps[:10],
"query": search_term
}
def track_app(self, google_id=None, apple_id=None):
data = {"timestamp": time.time()}
if google_id:
data["google"] = self.google.get_app_details(google_id)
if apple_id:
data["apple"] = self.apple.lookup_app(apple_id)
return data
Scaling with Proxies
For large-scale monitoring, ScraperAPI handles Google Play's anti-bot measures. ThorData provides residential IPs for both stores. Track scrapers with ScrapeOps.
Conclusion
App store scraping unlocks powerful market intelligence. Apple's iTunes API makes it straightforward, while Google Play requires more creative approaches. Combine data from both stores for complete market coverage, and cache results to minimize your footprint.
Top comments (0)