You make a request to a website and instead of the page you wanted, you get a "Checking your browser..." screen. Or worse, a CAPTCHA. Welcome to Cloudflare's challenge system.
Cloudflare doesn't just use one type of protection — it has multiple challenge layers, and understanding which one you're facing is the first step to handling it.
The Three Challenge Types
1. JS Challenge (Non-Interactive)
The "Checking your browser..." page. No user interaction needed — Cloudflare runs JavaScript checks and redirects automatically if your browser passes.
What it checks:
- JavaScript execution capability
- Browser fingerprint consistency
- TLS fingerprint (JA3/JA4)
- IP reputation
Request → CF Edge → JS Challenge Page →
Browser runs checks (2-5s) →
Sets cf_clearance cookie →
Redirect to actual page
2. Managed Challenge (Adaptive)
Cloudflare decides at the edge whether to:
- Pass you through silently (low risk)
- Show a Turnstile widget (medium risk)
- Show an interactive CAPTCHA (high risk)
This is the most common challenge type in 2024+. The decision happens server-side based on your risk score.
3. Turnstile (CAPTCHA Widget)
A standalone CAPTCHA widget that sites embed in their forms. Unlike the other two, Turnstile is explicitly placed by the site developer — it's not auto-injected by Cloudflare.
Identifying Which Challenge You're Facing
import httpx
from urllib.parse import urlparse
def identify_cf_challenge(resp: httpx.Response) -> str:
"""Identify the type of Cloudflare challenge."""
# Check status code
if resp.status_code == 403:
if "cf-mitigated" in resp.headers.get(
"cf-mitigated", ""
):
return "blocked" # IP banned
if resp.status_code == 503:
body = resp.text
if "jschl-answer" in body or "challenge-platform" in body:
return "js_challenge"
if "managed-challenge" in body:
return "managed_challenge"
# Check for Turnstile in normal pages
if resp.status_code == 200:
body = resp.text
if "cf-turnstile" in body or "challenges.cloudflare.com/turnstile" in body:
return "turnstile"
# Check headers
if "cf-ray" in resp.headers:
if resp.status_code in (403, 503):
return "cf_block"
return "none"
Handling JS Challenges
JS challenges require a real browser — you can't solve them with plain HTTP requests because they need JavaScript execution.
from playwright.sync_api import sync_playwright
from playwright_stealth import stealth_sync
import time
class CloudflareBypass:
def __init__(self):
self.cookies = {}
self.user_agent = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/121.0.0.0 Safari/537.36"
)
def solve_js_challenge(self, url: str) -> dict:
"""Get cf_clearance cookie by solving
JS challenge in a real browser."""
with sync_playwright() as p:
browser = p.chromium.launch(
headless=False, # Headed works better
args=[
"--disable-blink-features="
"AutomationControlled"
]
)
context = browser.new_context(
user_agent=self.user_agent,
viewport={"width": 1920, "height": 1080},
locale="en-US",
)
page = context.new_page()
stealth_sync(page)
# Navigate and wait for challenge to complete
page.goto(url)
# Wait for cf_clearance cookie to appear
for _ in range(30):
cookies = context.cookies()
cf_cookie = next(
(c for c in cookies
if c["name"] == "cf_clearance"),
None
)
if cf_cookie:
# Challenge solved!
self.cookies = {
c["name"]: c["value"]
for c in cookies
}
browser.close()
return self.cookies
time.sleep(1)
browser.close()
raise TimeoutError(
"JS challenge not solved in 30s"
)
def make_request(self, url: str) -> httpx.Response:
"""Make request using saved cookies."""
return httpx.get(
url,
cookies=self.cookies,
headers={"User-Agent": self.user_agent}
)
Cookie Persistence
The cf_clearance\ cookie is valid for a configurable period (usually 15-30 minutes). Save and reuse it:
import json
from pathlib import Path
class CookieStore:
def __init__(self, path: str = "cookies.json"):
self.path = Path(path)
def save(self, domain: str, cookies: dict):
store = self.load_all()
store[domain] = {
"cookies": cookies,
"saved_at": time.time(),
"ttl": 1800, # 30 minutes
}
self.path.write_text(json.dumps(store))
def get(self, domain: str) -> dict | None:
store = self.load_all()
entry = store.get(domain)
if not entry:
return None
# Check if expired
elapsed = time.time() - entry["saved_at"]
if elapsed > entry["ttl"]:
return None
return entry["cookies"]
def load_all(self) -> dict:
if self.path.exists():
return json.loads(self.path.read_text())
return {}
Handling Managed Challenges
Managed challenges adapt based on your risk score. The best strategy is a tiered approach:
class ManagedChallengeHandler:
def __init__(self):
self.cookie_store = CookieStore()
self.cf_bypass = CloudflareBypass()
self.captcha_solver = CaptchaSolver()
async def handle(self, url: str) -> str:
domain = urlparse(url).netloc
# Tier 1: Try with saved cookies
cookies = self.cookie_store.get(domain)
if cookies:
resp = httpx.get(
url, cookies=cookies,
headers={
"User-Agent": self.cf_bypass.user_agent
}
)
if resp.status_code == 200:
return resp.text
# Tier 2: Solve via browser
# (handles JS + Managed silently)
try:
cookies = self.cf_bypass.solve_js_challenge(url)
self.cookie_store.save(domain, cookies)
resp = httpx.get(
url, cookies=cookies,
headers={
"User-Agent": self.cf_bypass.user_agent
}
)
if resp.status_code == 200:
return resp.text
except TimeoutError:
pass
# Tier 3: If Turnstile appears, solve via API
return await self.solve_turnstile_flow(url)
async def solve_turnstile_flow(self, url: str):
"""Handle explicit Turnstile challenge."""
# Get the sitekey
resp = httpx.get(url)
sitekey = extract_turnstile_sitekey(resp.text)
if not sitekey:
raise Exception(
"Could not find Turnstile sitekey"
)
# Solve via API
token = await self.captcha_solver.solve(
captcha_type="turnstile",
sitekey=sitekey,
url=url
)
# Submit the token
resp = httpx.post(url, data={
"cf-turnstile-response": token
})
return resp.text
Handling Turnstile (Form-Embedded)
Turnstile in forms is the simplest to handle — it's just a CAPTCHA widget:
import httpx
import re
async def solve_turnstile_form(
url: str,
form_data: dict
) -> httpx.Response:
"""Submit a form that has Turnstile protection."""
# 1. Load the page to get the sitekey
client = httpx.Client()
page = client.get(url)
# 2. Extract Turnstile sitekey
match = re.search(
r'class="cf-turnstile"[^>]*data-sitekey="([^"]+)"',
page.text
)
if not match:
# Try alternate patterns
match = re.search(
r"turnstile\.render\([^,]+,\s*\{[^}]*sitekey:\s*['\"]([^'\"]+)",
page.text
)
sitekey = match.group(1) if match else None
if not sitekey:
raise ValueError("No Turnstile sitekey found")
# 3. Solve
solver = CaptchaSolver(
api_base="https://www.passxapi.com"
)
token = await solver.solve(
captcha_type="turnstile",
sitekey=sitekey,
url=url
)
# 4. Submit form with token
form_data["cf-turnstile-response"] = token
resp = client.post(url, data=form_data)
return resp
TLS Fingerprinting: The Hidden Challenge
Even with cookies, Cloudflare checks your TLS fingerprint (JA3/JA4). Python's default TLS stack looks different from Chrome:
# Problem: Python's httpx/requests uses its own
# TLS fingerprint that doesn't match Chrome
# Solution 1: Use curl_cffi (mimics browser TLS)
from curl_cffi import requests as cfreq
resp = cfreq.get(
"https://target-site.com",
impersonate="chrome120", # Mimics Chrome 120
cookies=cookies
)
# Solution 2: Use tls-client
import tls_client
session = tls_client.Session(
client_identifier="chrome_120",
random_tls_extension_order=True
)
resp = session.get(
"https://target-site.com",
cookies=cookies
)
The Complete Flow
import asyncio
from dataclasses import dataclass
@dataclassclass ScrapeResult:
url: str
html: str
challenge_type: str
cookies_reused: bool
class CloudflareScraper:
def __init__(self):
self.handler = ManagedChallengeHandler()
self.stats = {
"no_challenge": 0,
"cookie_reuse": 0,
"js_solved": 0,
"turnstile_solved": 0,
"failed": 0,
}
async def scrape(self, url: str) -> ScrapeResult:
domain = urlparse(url).netloc
# Try direct request first
resp = httpx.get(url, follow_redirects=True)
challenge = identify_cf_challenge(resp)
if challenge == "none":
self.stats["no_challenge"] += 1
return ScrapeResult(
url=url, html=resp.text,
challenge_type="none",
cookies_reused=False
)
# Handle the challenge
try:
html = await self.handler.handle(url)
self.stats[f"{challenge}_solved"] = (
self.stats.get(f"{challenge}_solved", 0) + 1
)
return ScrapeResult(
url=url, html=html,
challenge_type=challenge,
cookies_reused=False
)
except Exception as e:
self.stats["failed"] += 1
raise
async def scrape_many(
self, urls: list[str]
) -> list[ScrapeResult]:
results = []
for url in urls:
result = await self.scrape(url)
results.append(result)
print(f"Stats: {self.stats}")
return results
Key Takeaways
- Identify the challenge type first — JS, Managed, and Turnstile need different approaches
- JS challenges need a real browser — but you can reuse the cookies afterward
- Managed challenges are adaptive — lower your risk score with residential IPs and real fingerprints
- TLS fingerprint matters — use curl_cffi or tls-client to match browser signatures
- Cache cf_clearance cookies — they're valid for 15-30 minutes, don't re-solve every request
- Turnstile is the easiest — it's just a CAPTCHA widget you can solve via API
For solving Turnstile and other CAPTCHAs when they appear, check out passxapi-python — it handles Turnstile, reCAPTCHA, hCaptcha, and FunCaptcha with a single API.
Dealing with a specific Cloudflare challenge setup? Share your approach in the comments.
Top comments (0)