Contact data extraction is one of the most requested scraping tasks. Here's a reliable approach.
The Regex Pattern
const EMAIL_REGEX = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
const PHONE_REGEX = /\+[1-9]\d{0,2}[\s\-.]\(?\d{2,4}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{3,4}|\(\d{3}\)[\s\-.]?\d{3}[\s\-.]?\d{4}|\b\d{3}[\-.]\d{3}[\-.]\d{4}\b/g;
Full Extractor
const cheerio = require('cheerio');
async function extractContacts(url) {
const res = await fetch(url, {
headers: { 'User-Agent': 'ContactBot/1.0' }
});
const html = await res.text();
const $ = cheerio.load(html);
// Remove scripts/styles
$('script, style').remove();
const text = $('body').text();
const emails = [...new Set(text.match(EMAIL_REGEX) || [])];
const phones = [...new Set(text.match(PHONE_REGEX) || [])];
// Also check mailto: links
$('a[href^="mailto:"]').each((i, el) => {
const email = $(el).attr('href').replace('mailto:', '').split('?')[0];
if (!emails.includes(email)) emails.push(email);
});
// Check tel: links
$('a[href^="tel:"]').each((i, el) => {
const phone = $(el).attr('href').replace('tel:', '');
if (!phones.includes(phone)) phones.push(phone);
});
return { url, emails, phones };
}
Crawl Multiple Pages
async function crawlSite(startUrl, maxPages = 10) {
const visited = new Set();
const queue = [startUrl];
const allContacts = [];
while (queue.length > 0 && visited.size < maxPages) {
const url = queue.shift();
if (visited.has(url)) continue;
visited.add(url);
const contacts = await extractContacts(url);
allContacts.push(contacts);
// Add delay between requests
await new Promise(r => setTimeout(r, 1000 + Math.random() * 2000));
}
return allContacts;
}
Filter Out Junk
function isValidEmail(email) {
const junk = ['example.com', 'test.com', 'domain.com', 'email.com'];
return !junk.some(j => email.includes(j)) && !email.includes('noreply');
}
Use Cases
- Lead generation — find contact info for target companies
- Directory building — compile business contact databases
- Competitive analysis — who works at competitor companies?
- Recruitment — find developer emails from GitHub/portfolio sites
Resources
- Email Extractor Tool — ready to use on Apify
- 77 Free Scrapers
Need contact data extracted? Emails, phones, social profiles from any website. $20 flat rate. Email: Spinov001@gmail.com | Hire me
Top comments (0)