DEV Community

Алексей Спинов
Алексей Спинов

Posted on

How to Extract Emails and Contacts from Any Website (Node.js)

Contact data extraction is one of the most requested scraping tasks. Here's a reliable approach.

The Regex Pattern

const EMAIL_REGEX = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;

const PHONE_REGEX = /\+[1-9]\d{0,2}[\s\-.]\(?\d{2,4}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{3,4}|\(\d{3}\)[\s\-.]?\d{3}[\s\-.]?\d{4}|\b\d{3}[\-.]\d{3}[\-.]\d{4}\b/g;
Enter fullscreen mode Exit fullscreen mode

Full Extractor

const cheerio = require('cheerio');

async function extractContacts(url) {
  const res = await fetch(url, {
    headers: { 'User-Agent': 'ContactBot/1.0' }
  });
  const html = await res.text();
  const $ = cheerio.load(html);

  // Remove scripts/styles
  $('script, style').remove();
  const text = $('body').text();

  const emails = [...new Set(text.match(EMAIL_REGEX) || [])];
  const phones = [...new Set(text.match(PHONE_REGEX) || [])];

  // Also check mailto: links
  $('a[href^="mailto:"]').each((i, el) => {
    const email = $(el).attr('href').replace('mailto:', '').split('?')[0];
    if (!emails.includes(email)) emails.push(email);
  });

  // Check tel: links
  $('a[href^="tel:"]').each((i, el) => {
    const phone = $(el).attr('href').replace('tel:', '');
    if (!phones.includes(phone)) phones.push(phone);
  });

  return { url, emails, phones };
}
Enter fullscreen mode Exit fullscreen mode

Crawl Multiple Pages

async function crawlSite(startUrl, maxPages = 10) {
  const visited = new Set();
  const queue = [startUrl];
  const allContacts = [];

  while (queue.length > 0 && visited.size < maxPages) {
    const url = queue.shift();
    if (visited.has(url)) continue;
    visited.add(url);

    const contacts = await extractContacts(url);
    allContacts.push(contacts);

    // Add delay between requests
    await new Promise(r => setTimeout(r, 1000 + Math.random() * 2000));
  }

  return allContacts;
}
Enter fullscreen mode Exit fullscreen mode

Filter Out Junk

function isValidEmail(email) {
  const junk = ['example.com', 'test.com', 'domain.com', 'email.com'];
  return !junk.some(j => email.includes(j)) && !email.includes('noreply');
}
Enter fullscreen mode Exit fullscreen mode

Use Cases

  1. Lead generation — find contact info for target companies
  2. Directory building — compile business contact databases
  3. Competitive analysis — who works at competitor companies?
  4. Recruitment — find developer emails from GitHub/portfolio sites

Resources


Need contact data extracted? Emails, phones, social profiles from any website. $20 flat rate. Email: Spinov001@gmail.com | Hire me

Top comments (0)