Cheerio handles static HTML. But when you need JavaScript rendering, login forms, or infinite scroll — you need Playwright.
Install
npm install playwright
Basic Scraping
const { chromium } = require('playwright');
async function scrape(url) {
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
await page.goto(url, { waitUntil: 'networkidle' });
const data = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.item')).map(el => ({
title: el.querySelector('h2')?.textContent?.trim(),
price: el.querySelector('.price')?.textContent?.trim()
}));
});
await browser.close();
return data;
}
Handle Infinite Scroll
async function scrollAndScrape(page, maxScrolls = 10) {
let previousHeight = 0;
for (let i = 0; i < maxScrolls; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await page.waitForTimeout(2000);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) break;
previousHeight = currentHeight;
}
}
Wait for Dynamic Content
// Wait for specific element
await page.waitForSelector('.results-loaded');
// Wait for network idle
await page.waitForLoadState('networkidle');
// Wait for API response
const response = await page.waitForResponse(r => r.url().includes('/api/products'));
const data = await response.json();
Pro Tip: Intercept API Calls
Instead of parsing the rendered HTML, intercept the API calls the page makes:
const apiData = [];
page.on('response', async (response) => {
if (response.url().includes('/api/')) {
apiData.push(await response.json());
}
});
await page.goto(url);
// apiData now has structured JSON!
When to Use What
| Tool | Use When |
|---|---|
| fetch + JSON API | Site has API endpoints |
| Cheerio | Static HTML, no JS needed |
| Playwright | JS rendering, login, scroll |
Resources
Need browser automation or dynamic site scraping? $20-50 depending on complexity. Email: Spinov001@gmail.com | Hire me
Top comments (0)