Why Qualify Leads with Social Data
Not all leads are equal. A signup from a CEO with 50K followers is worth more than an anonymous Gmail address. Social profile data lets you score and prioritize leads automatically, so your sales team focuses on high-value prospects.
Lets build a lead qualification engine that enriches contact data with social signals.
What We Extract
- Professional title and company
- Social following and engagement rates
- Content themes and interests
- Activity level and recency
- Network connections and influence score
Setup
pip install requests beautifulsoup4 pandas
Social platforms have aggressive bot detection. ScraperAPI handles proxy rotation and browser fingerprinting.
The Profile Scraper
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime
SCRAPER_API_KEY = "YOUR_KEY"
def scrape_social_profile(platform, username):
urls = {
"twitter": f"https://nitter.net/{username}",
"github": f"https://github.com/{username}",
"dev": f"https://dev.to/{username}"
}
target = urls.get(platform)
if not target:
return None
url = f"http://api.scraperapi.com?api_key={SCRAPER_API_KEY}&url={target}"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
if platform == "github":
return parse_github_profile(soup, username)
elif platform == "twitter":
return parse_twitter_profile(soup, username)
elif platform == "dev":
return parse_dev_profile(soup, username)
return None
Platform-Specific Parsers
def parse_github_profile(soup, username):
name = soup.select_one(".vcard-fullname, [itemprop=name]")
bio = soup.select_one(".user-profile-bio, [data-bio-text]")
followers = soup.select_one("a[href$=followers] .text-bold")
repos = soup.select_one(".UnderlineNav-body a .Counter")
company = soup.select_one(".vcard-detail [itemprop=worksFor]")
return {
"platform": "github",
"username": username,
"name": name.text.strip() if name else None,
"bio": bio.text.strip() if bio else None,
"followers": extract_number(followers),
"repos": extract_number(repos),
"company": company.text.strip() if company else None
}
def parse_twitter_profile(soup, username):
name = soup.select_one(".profile-card-fullname")
bio = soup.select_one(".profile-card-bio")
followers = soup.select_one(".profile-stat-num")
return {
"platform": "twitter",
"username": username,
"name": name.text.strip() if name else None,
"bio": bio.text.strip() if bio else None,
"followers": extract_number(followers)
}
def parse_dev_profile(soup, username):
name = soup.select_one(".profile-header__name, h1")
bio = soup.select_one(".profile-header__bio, .js-user-info")
return {
"platform": "dev",
"username": username,
"name": name.text.strip() if name else None,
"bio": bio.text.strip() if bio else None
}
def extract_number(el):
if not el:
return 0
text = el.text.strip().replace(",", "")
match = re.search(r"\d+", text)
return int(match.group()) if match else 0
Lead Scoring Engine
def score_lead(profiles):
score = 0
reasons = []
total_followers = sum(p.get("followers", 0) for p in profiles)
# Follower score (0-30 points)
if total_followers > 10000:
score += 30
reasons.append(f"High influence: {total_followers} followers")
elif total_followers > 1000:
score += 20
reasons.append(f"Medium influence: {total_followers} followers")
elif total_followers > 100:
score += 10
reasons.append(f"Some influence: {total_followers} followers")
# Company presence (0-25 points)
companies = [p.get("company") for p in profiles if p.get("company")]
if companies:
score += 25
reasons.append(f"Company: {companies[0]}")
# Bio completeness (0-15 points)
bios = [p.get("bio") for p in profiles if p.get("bio")]
if bios:
score += 15
reasons.append("Has professional bio")
# Multi-platform presence (0-20 points)
platforms = len(profiles)
score += min(20, platforms * 10)
reasons.append(f"Active on {platforms} platforms")
# Activity signals (0-10 points)
repos = sum(p.get("repos", 0) for p in profiles)
if repos > 10:
score += 10
reasons.append(f"Active builder: {repos} repos")
return {
"score": score,
"grade": grade_lead(score),
"reasons": reasons
}
def grade_lead(score):
if score >= 80:
return "A"
elif score >= 60:
return "B"
elif score >= 40:
return "C"
elif score >= 20:
return "D"
return "F"
Batch Lead Qualification
import time
def qualify_leads(leads):
results = []
for lead in leads:
profiles = []
for platform, username in lead.get("social", {}).items():
try:
profile = scrape_social_profile(platform, username)
if profile:
profiles.append(profile)
time.sleep(1)
except Exception as e:
print(f"Error scraping {platform}/{username}: {e}")
scoring = score_lead(profiles)
results.append({
"email": lead["email"],
"score": scoring["score"],
"grade": scoring["grade"],
"reasons": "; ".join(scoring["reasons"]),
"profiles_found": len(profiles)
})
df = pd.DataFrame(results).sort_values("score", ascending=False)
return df
# Example usage
leads = [
{"email": "alice@company.com", "social": {"github": "alice-dev", "twitter": "alice"}},
{"email": "bob@startup.io", "social": {"github": "bobcoder"}},
{"email": "carol@bigcorp.com", "social": {"twitter": "carol_exec", "github": "carol"}}
]
df = qualify_leads(leads)
print(df.to_string())
Output Example
email score grade reasons profiles_found
alice@company.com 75 B Medium influence; Has bio; 2 platforms 2
carol@bigcorp.com 65 B Company: BigCorp; Has bio; 2 platforms 2
bob@startup.io 35 C Some influence; Active builder 1
Proxy Strategy
Social platforms are the hardest sites to scrape. Use ThorData residential proxies for platforms that block datacenter IPs. Monitor success rates with ScrapeOps.
Ethical Considerations
- Only scrape publicly available profiles
- Respect robots.txt and rate limits
- Comply with GDPR and CCPA for lead data
- Never scrape private or protected accounts
- Store enrichment data securely with encryption
Conclusion
Automated lead qualification with social data lets you prioritize high-value prospects without manual research. With ScraperAPI handling the scraping infrastructure, you can focus on building the scoring logic that matters for your business.
Top comments (0)