How to Scrape DoorDash, Uber Eats, and Grubhub Menu Data in 2026
Food delivery platforms are among the harder scraping targets — they use aggressive anti-bot measures, require location parameters, and structure their data differently across platforms. Here's what actually works for extracting menu data, restaurant listings, and pricing.
DoorDash: Menu Data Extraction
DoorDash embeds menu data in the page's server-side rendered HTML as a JSON blob. This is the cleanest approach — no API authentication needed:
import requests, re, json
from curl_cffi import requests as cf_requests
def scrape_doordash_menu(store_url: str) -> dict:
"""
Extract menu data from a DoorDash restaurant page.
URL format: https://www.doordash.com/store/restaurant-name-city-12345/
"""
session = cf_requests.Session()
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
response = session.get(store_url, impersonate="chrome124", headers=headers)
if response.status_code != 200:
return {}
html = response.text
# DoorDash embeds data in a script tag with id="__NEXT_DATA__" (Next.js)
# This contains the full menu structure
pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
match = re.search(pattern, html, re.DOTALL)
if not match:
return {}
try:
next_data = json.loads(match.group(1))
except json.JSONDecodeError:
return {}
# Navigate to menu data
# Path: props.pageProps.storeMenu or props.pageProps.initialState
page_props = next_data.get('props', {}).get('pageProps', {})
# Try different paths where DoorDash stores menu data
menu = (
page_props.get('storeMenu') or
page_props.get('initialState', {}).get('storeMenu') or
{}
)
return parse_doordash_menu(menu)
def parse_doordash_menu(raw_menu: dict) -> dict:
"""Extract structured menu items from DoorDash's data format"""
result = {
'store_name': '',
'categories': []
}
# DoorDash menu structure: menuCategories -> items
categories = raw_menu.get('menuCategories') or raw_menu.get('categories', [])
for category in categories:
cat_items = []
for item in category.get('items', []):
cat_items.append({
'name': item.get('name', ''),
'price': item.get('price', 0) / 100, # In cents
'description': item.get('description', ''),
'calories': item.get('calories', ''),
'id': item.get('id', ''),
})
if cat_items:
result['categories'].append({
'name': category.get('name', ''),
'items': cat_items
})
return result
# Usage
menu = scrape_doordash_menu("https://www.doordash.com/store/the-heights-deli-los-angeles-2501711/")
for category in menu.get('categories', [])[:3]:
print(f"\n{category['name']}:")
for item in category['items'][:5]:
print(f" {item['name']}: ${item['price']:.2f}")
DoorDash: Location-Based Restaurant Search
def search_doordash_restaurants(lat: float, lng: float, keyword: str = "") -> list:
"""
Search for restaurants in a location area.
Uses DoorDash's unofficial API.
"""
session = cf_requests.Session()
# DoorDash uses a GraphQL-like API for search
api_url = "https://www.doordash.com/graphql/getRestaurantFeed"
payload = {
"operationName": "getRestaurantFeed",
"variables": {
"lat": str(lat),
"lng": str(lng),
"offset": 0,
"limit": 20,
"filters": {},
}
}
headers = {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Referer": "https://www.doordash.com/",
"Origin": "https://www.doordash.com",
}
r = session.post(api_url, impersonate="chrome124",
json=payload, headers=headers)
if r.status_code != 200:
return []
data = r.json()
restaurants = data.get('data', {}).get('restaurantFeed', {}).get('storeHeaders', [])
return [{
'name': r.get('name', ''),
'id': r.get('id', ''),
'rating': r.get('averageRating', ''),
'delivery_time': r.get('status', {}).get('pickupTime', ''),
'url': f"https://www.doordash.com/store/{r.get('name','').lower().replace(' ','-')}-{r.get('id','')}/"
} for r in restaurants]
Uber Eats: Menu via Embedded JSON
Uber Eats similarly embeds data in its Next.js page structure:
def scrape_ubereats_menu(restaurant_url: str) -> dict:
"""
Scrape Uber Eats restaurant menu from embedded JSON.
URL: https://www.ubereats.com/store/restaurant-name/UUID
"""
session = cf_requests.Session()
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
}
r = session.get(restaurant_url, impersonate="chrome124", headers=headers)
html = r.text
# Uber Eats embeds data in: window.__REDUX_STATE__ or in a JSON script tag
# Method 1: Redux state
redux_match = re.search(r'window\.__REDUX_STATE__\s*=\s*({.*?});', html, re.DOTALL)
if redux_match:
try:
state = json.loads(redux_match.group(1))
# Navigate to menu in Redux state
menu_data = state.get('routeData', {}).get('storeInfo', {})
return extract_ubereats_menu(menu_data)
except json.JSONDecodeError:
pass
# Method 2: Script tags with data-state or similar
for script in re.findall(r'<script[^>]*>(.*?)</script>', html, re.DOTALL):
if '"catalogSectionsMap"' in script or '"menuSections"' in script:
try:
data = json.loads(script)
return extract_ubereats_menu(data)
except:
continue
return {}
def extract_ubereats_menu(data: dict) -> dict:
sections = data.get('catalogSectionsMap') or data.get('menuSections', {})
result = {'categories': []}
for section_id, section in sections.items():
items = []
for item in section.get('catalogItems', section.get('items', [])):
items.append({
'name': item.get('title', ''),
'price': item.get('priceTagline', '') or str(item.get('price', 0)),
'description': item.get('description', ''),
})
if items:
result['categories'].append({
'name': section.get('title', ''),
'items': items
})
return result
Grubhub: Simpler HTML Extraction
Grubhub's pages are less JavaScript-heavy and easier to parse:
from bs4 import BeautifulSoup
def scrape_grubhub_menu(restaurant_url: str) -> list:
"""
Extract menu from Grubhub restaurant page.
"""
session = cf_requests.Session()
r = session.get(restaurant_url, impersonate="chrome124",
headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"})
# Try JSON-LD schema first (structured data)
soup = BeautifulSoup(r.text, 'html.parser')
items = []
# Grubhub often includes schema.org/MenuItem
for script in soup.find_all('script', type='application/ld+json'):
try:
data = json.loads(script.string)
if data.get('@type') == 'Restaurant' and 'hasMenu' in data:
menu = data['hasMenu']
for section in menu.get('hasMenuSection', []):
for item in section.get('hasMenuItem', []):
items.append({
'name': item.get('name', ''),
'price': item.get('offers', {}).get('price', ''),
'description': item.get('description', ''),
'category': section.get('name', ''),
})
except:
continue
if items:
return items
# Fallback: HTML parsing
for item_div in soup.find_all('div', class_=lambda c: c and 'menuItem' in c):
name = item_div.find(class_=lambda c: c and 'name' in c.lower())
price = item_div.find(class_=lambda c: c and 'price' in c.lower())
if name and price:
items.append({
'name': name.text.strip(),
'price': price.text.strip(),
})
return items
Handling Anti-Bot on Food Delivery Apps
These platforms use progressively stronger detection:
| Platform | Detection Level | Best Approach |
|---|---|---|
| Grubhub | Basic | curl_cffi + Chrome impersonation |
| Uber Eats | Moderate | curl_cffi + residential proxy |
| DoorDash | Moderate-High | curl_cffi + proxy + session warm-up |
| Deliveroo (UK) | High | Playwright + camoufox |
import time, random
def robust_food_delivery_scrape(scrape_func, url: str, max_retries: int = 3):
"""Wrapper that handles rate limiting for food delivery sites"""
for attempt in range(max_retries):
try:
result = scrape_func(url)
if result:
return result
# Empty result might mean soft block
print(f"Empty result on attempt {attempt+1}, waiting...")
time.sleep(10 * (attempt + 1)) # Progressive backoff
except Exception as e:
print(f"Attempt {attempt+1} failed: {e}")
time.sleep(5 * (attempt + 1))
return {}
Batch Scraping Strategy
For scraping multiple restaurants:
import time, random
def scrape_restaurant_list(store_urls: list, delay_range=(3, 7)) -> list:
"""
Scrape multiple restaurants with human-like pacing.
"""
results = []
for i, url in enumerate(store_urls):
print(f"[{i+1}/{len(store_urls)}] Scraping: {url[:60]}")
# Rotate function based on platform
if 'doordash' in url:
data = scrape_doordash_menu(url)
elif 'ubereats' in url:
data = scrape_ubereats_menu(url)
elif 'grubhub' in url:
data = {'items': scrape_grubhub_menu(url)}
else:
continue
if data:
results.append({'url': url, 'data': data})
# Human-like delay between requests
delay = random.uniform(*delay_range)
time.sleep(delay)
# Longer break every 10 restaurants
if (i + 1) % 10 == 0:
print("Taking a longer break...")
time.sleep(random.uniform(30, 60))
return results
Saving to CSV/JSON
import csv, json
def save_menu_to_csv(menu_data: dict, filename: str):
"""Flatten menu items to CSV"""
rows = []
store_name = menu_data.get('store_name', '')
for category in menu_data.get('categories', []):
for item in category.get('items', []):
rows.append({
'store': store_name,
'category': category.get('name', ''),
'item_name': item.get('name', ''),
'price': item.get('price', ''),
'description': item.get('description', '')[:100],
'calories': item.get('calories', ''),
})
if rows:
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
writer.writeheader()
writer.writerows(rows)
print(f"Saved {len(rows)} items to {filename}")
Related Articles
- Web Scraping Without Getting Banned in 2026 — Full anti-detection playbook
- curl_cffi Stopped Working? Here's What to Try Next — When basic curl_cffi isn't enough
- Amazon's Anti-Bot Strategy — ML-based detection patterns
Take the next step
Skip the setup. Production-ready tools for food delivery scraping:
Apify Scrapers Bundle — $29 one-time
Instant download. Documented. Ready to deploy.
Top comments (0)