How to Scrape Facebook Public Data Without the Graph API in 2026
Facebook's Graph API has been progressively restricted since 2018. Most useful endpoints now require app review, business verification, or return empty results for public pages. But the data itself is still publicly visible — here's how to access it.
What You Can Still Scrape (Legally, from Public Pages)
Facebook marks certain content as "public" — meaning anyone can see it without being logged in:
- Public Page posts — business pages, news organizations, public figures
- Public Group posts — groups with "Public" visibility setting
- Page info — name, category, about, contact info, follower count
- Events — public events from pages
- Marketplace listings — some regional availability
- Reviews — on business pages
What requires authentication (and is harder):
- Profile data (privacy settings usually restrict this)
- Private group content
- Ads Library data (has a dedicated API)
Method 1: Mobile API Endpoint (Most Reliable)
Facebook's mobile web app uses an internal API that's simpler to work with than the main site:
from curl_cffi import requests as cf_requests
import json, re, time, random
def scrape_facebook_page(page_id_or_name: str) -> dict:
"""
Scrape a Facebook public page using mobile web endpoints.
page_id_or_name: Facebook page username or numeric ID
"""
session = cf_requests.Session()
# Facebook mobile web - less JavaScript-heavy than desktop
url = f"https://m.facebook.com/{page_id_or_name}"
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
}
response = session.get(url, impersonate="safari17_0", headers=headers)
if response.status_code != 200:
return {}
return parse_facebook_mobile_page(response.text)
def parse_facebook_mobile_page(html: str) -> dict:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
result = {}
# Page title
title = soup.find('title')
if title:
result['name'] = title.text.replace(' | Facebook', '').strip()
# Page description / about
meta_desc = soup.find('meta', {'name': 'description'})
if meta_desc:
result['description'] = meta_desc.get('content', '')
# Extract posts from timeline
posts = []
for post_div in soup.find_all('div', {'data-ft': True})[:10]:
try:
data_ft = json.loads(post_div.get('data-ft', '{}'))
text_divs = post_div.find_all('p')
text = ' '.join(p.text.strip() for p in text_divs)
if text:
posts.append({
'text': text[:500],
'story_id': data_ft.get('mf_story_key', ''),
})
except:
continue
result['posts'] = posts
return result
Method 2: Facebook Graph API — Public Data That Still Works
Contrary to popular belief, some Graph API endpoints still work without app review:
import requests
def get_public_page_info(page_id: str, access_token: str = None) -> dict:
"""
Get basic public page info via Graph API.
Works with app access token (just client_id + client_secret, no review needed).
"""
# Get app access token if not provided
if not access_token:
# This token type works for public data — no user auth needed
# Get your app credentials from developers.facebook.com (free)
CLIENT_ID = "your_app_id"
CLIENT_SECRET = "your_app_secret"
r = requests.get(
"https://graph.facebook.com/oauth/access_token",
params={
"client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET,
"grant_type": "client_credentials"
}
)
access_token = r.json().get('access_token')
# Fields that are publicly accessible
fields = "id,name,category,about,fan_count,phone,email,website,location"
r = requests.get(
f"https://graph.facebook.com/v19.0/{page_id}",
params={"fields": fields, "access_token": access_token}
)
return r.json()
def get_page_posts_via_api(page_id: str, access_token: str, limit: int = 25) -> list:
"""
Get recent posts from a public page.
Note: requires page access token for full content, app token for basic data.
"""
r = requests.get(
f"https://graph.facebook.com/v19.0/{page_id}/posts",
params={
"fields": "id,message,created_time,likes.summary(true),comments.summary(true)",
"limit": limit,
"access_token": access_token
}
)
data = r.json()
return data.get('data', [])
Method 3: Facebook Ads Library API (No Restrictions!)
The Ads Library is a transparency tool that's fully open — no app review, no restrictions:
import requests
def search_facebook_ads(
search_terms: str,
country: str = "US",
ad_type: str = "ALL",
limit: int = 100
) -> list:
"""
Search the Facebook Ads Library.
Requires: facebook.com account + access token from
https://www.facebook.com/ads/library/api/
(Free, no app review needed)
"""
ACCESS_TOKEN = "your_ads_library_token"
url = "https://graph.facebook.com/v19.0/ads_archive"
params = {
"access_token": ACCESS_TOKEN,
"search_terms": search_terms,
"ad_reached_countries": [country],
"ad_type": ad_type,
"fields": "id,ad_creation_time,ad_creative_body,ad_creative_link_caption,ad_creative_link_description,ad_creative_link_title,ad_delivery_start_time,ad_delivery_stop_time,demographic_distribution,impressions,page_id,page_name,region_distribution,spend",
"limit": limit,
}
all_ads = []
while True:
r = requests.get(url, params=params)
data = r.json()
ads = data.get('data', [])
all_ads.extend(ads)
# Handle pagination
paging = data.get('paging', {})
next_cursor = paging.get('cursors', {}).get('after')
if not next_cursor or len(all_ads) >= limit:
break
params['after'] = next_cursor
return all_ads
# Use case: See competitor advertising strategy
ads = search_facebook_ads("competitor brand name", country="US")
for ad in ads[:5]:
print(f"Page: {ad.get('page_name')}")
print(f"Body: {ad.get('ad_creative_body', '')[:200]}")
print(f"Impressions: {ad.get('impressions', {})}")
print()
Method 4: Playwright for Pages Requiring Login
For data that needs a logged-in session (private groups, profile data with permission):
from playwright.sync_api import sync_playwright
import json, time
# YOU NEED: A Facebook account whose cookies are exported
# Use Cookie Editor browser extension to export as JSON
def scrape_with_facebook_session(url: str, cookies_path: str) -> str:
"""
Scrape Facebook using saved session cookies.
"""
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
)
# Load cookies
with open(cookies_path) as f:
cookies = json.load(f)
playwright_cookies = []
for c in cookies:
playwright_cookies.append({
'name': c['name'],
'value': c['value'],
'domain': '.facebook.com',
'path': '/',
})
context.add_cookies(playwright_cookies)
page = context.new_page()
page.goto(url, wait_until="networkidle")
time.sleep(2)
content = page.content()
browser.close()
return content
Handling Facebook's Anti-Bot
Facebook has strong bot detection for logged-in sessions:
import time, random
def rate_limited_facebook_scrape(urls: list, delay_range=(5, 15)) -> list:
"""
Scrape Facebook URLs with conservative rate limiting.
Facebook tracks session behavior heavily — slow down significantly.
"""
results = []
session = cf_requests.Session()
for i, url in enumerate(urls):
# Warm up with homepage visit on new session (every 20 pages)
if i % 20 == 0:
session.get("https://www.facebook.com/", impersonate="chrome124")
time.sleep(random.uniform(3, 6))
response = session.get(url, impersonate="chrome124",
headers={"Referer": "https://www.facebook.com/"})
if response.status_code == 200:
results.append({'url': url, 'html': response.text})
elif response.status_code == 429:
print("Rate limited — taking a long break")
time.sleep(random.uniform(60, 120))
# Much longer delays than other sites — Facebook tracks timing patterns
time.sleep(random.uniform(*delay_range))
return results
Conservative rate limits:
- 1 request / 5-15 seconds (much slower than other sites)
- Max 50-100 pages per hour per IP
- Use residential proxies, rotate every 30-50 requests
- Facebook bans IPs and device fingerprints, not just accounts
Practical Use Cases
Competitor monitoring:
# Track competitor page growth and post frequency
competitor_pages = ["brand-a", "brand-b", "brand-c"]
for page in competitor_pages:
info = get_public_page_info(page, access_token)
posts = get_page_posts_via_api(page, access_token, limit=10)
print(f"{info.get('name')}: {info.get('fan_count', 0):,} fans")
print(f" Recent posts: {len(posts)}")
print(f" Avg likes: {sum(p.get('likes',{}).get('summary',{}).get('total_count',0) for p in posts) / len(posts):.0f}")
Lead generation from public groups:
# Public groups often have members' names + job titles visible
# Combine with LinkedIn to enrich contact data
# (ethical: only use for outreach if they posted publicly in a business context)
Ad intelligence via Ads Library:
# Track competitor ad spend patterns
ads = search_facebook_ads("competitor product name")
active_ads = [a for a in ads if not a.get('ad_delivery_stop_time')]
print(f"Competitor has {len(active_ads)} active ads currently")
What the Graph API Still Gives You Free
Despite restrictions, these work with a basic app token:
| Endpoint | What You Get |
|---|---|
/{page-id} |
Name, category, about, fan count, contact info |
/{page-id}/photos |
Public photos (limited) |
/{page-id}/events |
Public upcoming events |
/{page-id}/ratings |
Star ratings + reviews (if enabled) |
/ads_archive |
Full ads transparency data (no review needed) |
/{page-id}/feed |
Recent posts (varies by page settings) |
Related Articles
- Web Scraping Without Getting Banned in 2026 — Full anti-detection playbook
- How to Extract B2B Contact Information at Scale — Contact data extraction pipeline
Get the Complete Apify Scrapers Bundle
Save 10+ hours of setup time. The Apify Scrapers Bundle ($29) includes ready-to-use scrapers for Amazon, Google Maps, LinkedIn, TikTok, Instagram and 30+ more platforms — with documentation, example inputs, and output schemas.
Top comments (0)