B2B lead generation runs on data. The best sales teams enrich prospects with company data, verified emails, and professional profiles. Here's how to build an automated lead enrichment pipeline combining multiple data sources.
The Lead Enrichment Stack
- LinkedIn: Professional profiles, job titles, company associations
- Clearbit: Company data, technographics, funding info
- Hunter.io: Email discovery and verification
- Public sources: Company websites, press releases, SEC filings
Hunter.io Email Discovery
Hunter.io offers a generous free tier (25 searches/month) and affordable paid plans:
import requests
import time
class HunterClient:
BASE_URL = "https://api.hunter.io/v2"
def __init__(self, api_key):
self.api_key = api_key
def domain_search(self, domain, limit=10):
resp = requests.get(f"{self.BASE_URL}/domain-search", params={
"domain": domain,
"api_key": self.api_key,
"limit": limit,
})
data = resp.json().get("data", {})
return [{
"email": e["value"],
"type": e.get("type"),
"confidence": e.get("confidence"),
"first_name": e.get("first_name"),
"last_name": e.get("last_name"),
"position": e.get("position"),
} for e in data.get("emails", [])]
def find_email(self, domain, first_name, last_name):
resp = requests.get(f"{self.BASE_URL}/email-finder", params={
"domain": domain,
"first_name": first_name,
"last_name": last_name,
"api_key": self.api_key,
})
data = resp.json().get("data", {})
return {
"email": data.get("email"),
"confidence": data.get("confidence"),
"sources": data.get("sources"),
}
def verify_email(self, email):
resp = requests.get(f"{self.BASE_URL}/email-verifier", params={
"email": email,
"api_key": self.api_key,
})
data = resp.json().get("data", {})
return {
"status": data.get("status"), # valid, invalid, accept_all
"disposable": data.get("disposable"),
"webmail": data.get("webmail"),
}
Clearbit Company Enrichment
class ClearbitClient:
def __init__(self, api_key):
self.api_key = api_key
self.session = requests.Session()
self.session.headers["Authorization"] = f"Bearer {api_key}"
def enrich_company(self, domain):
resp = self.session.get(
f"https://company.clearbit.com/v2/companies/find",
params={"domain": domain}
)
if resp.status_code != 200:
return None
data = resp.json()
return {
"name": data.get("name"),
"domain": data.get("domain"),
"industry": data.get("category", {}).get("industry"),
"employee_count": data.get("metrics", {}).get("employees"),
"revenue_range": data.get("metrics", {}).get("estimatedAnnualRevenue"),
"tech_stack": data.get("tech", []),
"funding": data.get("metrics", {}).get("raised"),
"location": data.get("geo", {}).get("city"),
"description": data.get("description"),
}
Scraping Company Websites for Context
API_KEY = "YOUR_SCRAPERAPI_KEY"
def scrape_company_page(url):
params = {
"api_key": API_KEY,
"url": url,
"render": "true"
}
resp = requests.get(
"https://api.scraperapi.com", params=params, timeout=60
)
soup = BeautifulSoup(resp.text, "html.parser")
# Extract key information
info = {"url": url}
# Team/About page links
for link in soup.find_all("a"):
href = link.get("href", "").lower()
text = link.get_text(strip=True).lower()
if any(kw in href or kw in text for kw in ["team", "about", "leadership"]):
info["team_page"] = link.get("href")
break
# Tech indicators from meta tags and scripts
scripts = [s.get("src", "") for s in soup.find_all("script") if s.get("src")]
info["tech_indicators"] = detect_tech(scripts)
return info
def detect_tech(script_urls):
tech = []
patterns = {
"react": "react", "vue": "vue", "angular": "angular",
"stripe": "stripe", "intercom": "intercom",
"hubspot": "hubspot", "segment": "segment",
"google_analytics": "google-analytics|gtag",
}
for url in script_urls:
for tech_name, pattern in patterns.items():
if re.search(pattern, url, re.I):
tech.append(tech_name)
return list(set(tech))
ScraperAPI ensures reliable access to company websites regardless of their security setup.
The Full Enrichment Pipeline
from bs4 import BeautifulSoup
import re
import csv
class LeadEnrichmentPipeline:
def __init__(self, hunter_key, clearbit_key, scraper_key):
self.hunter = HunterClient(hunter_key)
self.clearbit = ClearbitClient(clearbit_key)
self.scraper_key = scraper_key
def enrich_lead(self, domain, first_name=None, last_name=None):
lead = {"domain": domain}
# Step 1: Company enrichment
company = self.clearbit.enrich_company(domain)
if company:
lead["company"] = company
# Step 2: Email discovery
if first_name and last_name:
email_result = self.hunter.find_email(domain, first_name, last_name)
lead["email"] = email_result
else:
emails = self.hunter.domain_search(domain, limit=5)
lead["emails"] = emails
# Step 3: Website intelligence
lead["web_intel"] = scrape_company_page(f"https://{domain}")
time.sleep(1) # Rate limiting
return lead
def enrich_batch(self, leads):
enriched = []
for lead in leads:
result = self.enrich_lead(
lead["domain"],
lead.get("first_name"),
lead.get("last_name")
)
enriched.append(result)
time.sleep(2)
return enriched
# Example usage
pipeline = LeadEnrichmentPipeline(
hunter_key="YOUR_HUNTER_KEY",
clearbit_key="YOUR_CLEARBIT_KEY",
scraper_key=API_KEY,
)
leads = [
{"domain": "stripe.com", "first_name": "John", "last_name": "Doe"},
{"domain": "notion.so"},
{"domain": "linear.app"},
]
enriched = pipeline.enrich_batch(leads)
Exporting to CRM Format
def export_to_csv(enriched_leads, filename="leads_enriched.csv"):
with open(filename, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow([
"Domain", "Company", "Industry", "Employees",
"Revenue", "Email", "Confidence", "Tech Stack"
])
for lead in enriched_leads:
company = lead.get("company", {})
email = lead.get("email", {})
writer.writerow([
lead["domain"],
company.get("name", ""),
company.get("industry", ""),
company.get("employee_count", ""),
company.get("revenue_range", ""),
email.get("email", ""),
email.get("confidence", ""),
", ".join(company.get("tech_stack", [])),
])
For scaling lead enrichment across thousands of companies, use ThorData for residential proxies and ScrapeOps for monitoring.
B2B lead enrichment combines multiple data sources into actionable intelligence. API-first services like Hunter.io and Clearbit provide structured data, while web scraping fills the gaps. The key is building a pipeline that enriches reliably and respects rate limits.
Happy scraping!
Top comments (0)