Scraping App Permission Data: Privacy Analysis at Scale
Mobile apps request permissions that reveal their true data collection practices. Let's build a Python pipeline that scrapes app permission data and analyzes privacy patterns at scale.
Data Sources
- Google Play Store — Detailed permission lists and data safety sections
- Exodus Privacy — Android tracker and permission database (API available)
- Apple App Store — Privacy nutrition labels
Setting Up
pip install requests beautifulsoup4 pandas matplotlib google-play-scraper
Google Play Store Permissions
from google_play_scraper import app, permissions
import pandas as pd
def get_app_permissions(app_id):
try:
details = app(app_id)
perms = permissions(app_id)
return {
"app_id": app_id,
"title": details.get("title", ""),
"developer": details.get("developer", ""),
"installs": details.get("realInstalls", 0),
"score": details.get("score", 0),
"permissions": perms,
}
except Exception as e:
print(f"Error fetching {app_id}: {e}")
return None
apps_to_check = [
"com.whatsapp", "org.telegram.messenger",
"com.discord", "com.Slack", "com.facebook.orca"
]
results = []
for app_id in apps_to_check:
data = get_app_permissions(app_id)
if data:
results.append(data)
print(f"{data['title']}: {len(data['permissions'])} permission groups")
Exodus Privacy API
import requests
EXODUS_API = "https://reports.exodus-privacy.eu.org/api"
def get_exodus_report(app_id):
resp = requests.get(f"{EXODUS_API}/search/{app_id}/details")
if resp.status_code == 200:
data = resp.json()
if data:
latest = data[0] if isinstance(data, list) else data
return {
"app_id": app_id,
"trackers": latest.get("trackers", []),
"tracker_count": len(latest.get("trackers", [])),
"permissions": latest.get("permissions", []),
"permission_count": len(latest.get("permissions", []))
}
return None
for app_id in apps_to_check:
report = get_exodus_report(app_id)
if report:
print(f"{app_id}: {report['tracker_count']} trackers, "
f"{report['permission_count']} permissions")
Permission Risk Scoring
HIGH_RISK_PERMISSIONS = {
"android.permission.READ_CONTACTS": 8,
"android.permission.ACCESS_FINE_LOCATION": 9,
"android.permission.CAMERA": 7,
"android.permission.RECORD_AUDIO": 8,
"android.permission.READ_PHONE_STATE": 6,
"android.permission.READ_SMS": 9,
"android.permission.READ_CALL_LOG": 8,
"android.permission.BODY_SENSORS": 7,
}
def calculate_privacy_score(permissions):
risk_score = 0
flagged = []
for perm in permissions:
if perm in HIGH_RISK_PERMISSIONS:
risk_score += HIGH_RISK_PERMISSIONS[perm]
flagged.append(perm.split(".")[-1])
return {"risk_score": risk_score, "flagged_permissions": flagged,
"total_permissions": len(permissions)}
Comparative Analysis
import matplotlib.pyplot as plt
def compare_app_privacy(results):
df = pd.DataFrame(results)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
axes[0].barh(df["title"], df["permissions"].apply(len))
axes[0].set_xlabel("Number of Permission Groups")
axes[0].set_title("Permission Requests by App")
risk_scores = []
for _, row in df.iterrows():
all_perms = []
for group in row["permissions"]:
all_perms.extend(group.get("permissions", []))
score = calculate_privacy_score(all_perms)
risk_scores.append(score["risk_score"])
colors = ["#2ecc71" if s < 15 else "#f39c12" if s < 30 else "#e74c3c"
for s in risk_scores]
axes[1].barh(df["title"], risk_scores, color=colors)
axes[1].set_xlabel("Privacy Risk Score")
axes[1].set_title("Privacy Risk Assessment")
plt.tight_layout()
plt.savefig("privacy_comparison.png", dpi=150)
compare_app_privacy(results)
Scraping iOS Privacy Labels
For Apple App Store privacy labels, use ScraperAPI with JS rendering:
def scrape_ios_privacy(app_url):
params = {
"api_key": "YOUR_SCRAPERAPI_KEY",
"url": app_url,
"render": "true"
}
resp = requests.get("https://api.scraperapi.com", params=params)
soup = BeautifulSoup(resp.text, "html.parser")
privacy_cards = soup.select(".app-privacy__card")
labels = []
for card in privacy_cards:
category = card.select_one(".app-privacy__card-header")
items = card.select(".app-privacy__list-item")
if category:
labels.append({
"category": category.get_text(strip=True),
"data_types": [i.get_text(strip=True) for i in items]
})
return labels
Scale with ThorData proxies and monitor with ScrapeOps.
Key Takeaways
- Google Play Scraper and Exodus API provide structured permission data
- Risk scoring quantifies privacy invasiveness across apps
- Comparative analysis reveals which apps over-collect data
- Regular monitoring catches permission creep in updates
App store data is publicly visible. Use this analysis for privacy research and informed app choices.
Top comments (0)