Crunchbase is the definitive source for startup funding data — investment rounds, investors, acquisitions, and company profiles. Tracking this data helps investors, analysts, and founders understand market dynamics and spot emerging trends.
What Funding Data Can You Extract?
- Funding rounds (seed, Series A-F, IPO)
- Investor names and portfolios
- Company valuations and revenue estimates
- Acquisition data and exit events
- Founder and executive profiles
- Industry and geographic trends
Scraping Crunchbase Company Data
import requests
from bs4 import BeautifulSoup
import json
import time
import re
class CrunchbaseScraper:
BASE_URL = "https://www.crunchbase.com"
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'Accept': 'text/html,application/xhtml+xml',
})
def get_company_profile(self, company_slug):
"""Get company overview from Crunchbase."""
url = f"{self.BASE_URL}/organization/{company_slug}"
resp = self.session.get(url)
soup = BeautifulSoup(resp.text, 'html.parser')
# Extract structured data
scripts = soup.find_all('script', type='application/ld+json')
ld_data = {}
for script in scripts:
try:
ld_data = json.loads(script.string)
break
except (json.JSONDecodeError, TypeError):
continue
# Extract key metrics
profile = {
'name': ld_data.get('name', ''),
'description': ld_data.get('description', ''),
'url': ld_data.get('url', ''),
'founded': ld_data.get('foundingDate', ''),
}
# Parse funding info from page
funding_section = soup.select_one('[class*="funding"]')
if funding_section:
profile['funding_text'] = funding_section.get_text(strip=True)
return profile
def search_companies(self, query):
"""Search for companies by name or keyword."""
url = f"{self.BASE_URL}/textsearch"
params = {'q': query}
resp = self.session.get(url, params=params)
soup = BeautifulSoup(resp.text, 'html.parser')
results = []
for item in soup.select('[class*="search-result"]'):
name = item.select_one('[class*="name"]')
desc = item.select_one('[class*="description"]')
link = item.select_one('a')
if name:
results.append({
'name': name.get_text(strip=True),
'description': desc.get_text(strip=True) if desc else '',
'url': self.BASE_URL + link['href'] if link else '',
})
return results
Building a Funding Tracker
import pandas as pd
from datetime import datetime
class FundingTracker:
def __init__(self, scraper, db_file='funding_data.csv'):
self.scraper = scraper
self.db_file = db_file
def track_companies(self, company_slugs):
"""Collect and store funding data for multiple companies."""
records = []
for slug in company_slugs:
try:
profile = self.scraper.get_company_profile(slug)
profile['slug'] = slug
profile['collected_at'] = datetime.now().isoformat()
records.append(profile)
print(f" Collected: {profile.get('name', slug)}")
except Exception as e:
print(f" Error with {slug}: {e}")
time.sleep(3) # Respect rate limits
df = pd.DataFrame(records)
df.to_csv(self.db_file, mode='a', header=False, index=False)
return df
def analyze_sector(self, sector_companies):
"""Analyze funding patterns in a sector."""
df = pd.read_csv(self.db_file)
sector_df = df[df['slug'].isin(sector_companies)]
print(f"Sector analysis ({len(sector_df)} companies):")
print(f" Companies tracked: {sector_df['name'].nunique()}")
return sector_df
Investment Pattern Analysis
def analyze_investment_patterns(funding_data):
"""Find patterns in startup funding data."""
df = pd.DataFrame(funding_data)
if 'funding_amount' in df.columns:
# Average round size by stage
stage_avg = df.groupby('stage')['funding_amount'].agg(['mean', 'median', 'count'])
print("Funding by stage:")
print(stage_avg.to_string())
# Top investors by deal count
if 'investors' in df.columns:
all_investors = []
for inv_list in df['investors'].dropna():
if isinstance(inv_list, str):
all_investors.extend(inv_list.split(', '))
from collections import Counter
top_investors = Counter(all_investors).most_common(15)
print("\nMost active investors:")
for investor, count in top_investors:
print(f" {investor}: {count} deals")
return df
def detect_hot_sectors(funding_data, window_days=90):
"""Identify sectors with increasing funding activity."""
df = pd.DataFrame(funding_data)
df['date'] = pd.to_datetime(df.get('date', datetime.now()))
recent = df[df['date'] > datetime.now() - pd.Timedelta(days=window_days)]
if 'category' in recent.columns:
sector_activity = recent.groupby('category').agg(
deals=('name', 'count'),
total_funding=('funding_amount', 'sum')
).sort_values('deals', ascending=False)
print(f"Hot sectors (last {window_days} days):")
print(sector_activity.head(10).to_string())
return sector_activity
return pd.DataFrame()
Building Investor Intelligence
def build_investor_profile(funding_data, investor_name):
"""Build a profile of an investor's activity."""
df = pd.DataFrame(funding_data)
# Filter deals involving this investor
investor_deals = df[
df.get('investors', '').str.contains(investor_name, na=False, case=False)
]
if investor_deals.empty:
print(f"No deals found for {investor_name}")
return {}
profile = {
'name': investor_name,
'total_deals': len(investor_deals),
'companies': investor_deals['name'].tolist(),
}
if 'stage' in investor_deals.columns:
profile['preferred_stages'] = investor_deals['stage'].value_counts().to_dict()
if 'category' in investor_deals.columns:
profile['preferred_sectors'] = investor_deals['category'].value_counts().to_dict()
print(f"Investor: {investor_name}")
print(f" Total deals: {profile['total_deals']}")
print(f" Recent companies: {', '.join(profile['companies'][:5])}")
return profile
Scaling with Cloud Solutions
For comprehensive Crunchbase data covering thousands of companies, the Crunchbase Scraper on Apify handles anti-bot measures and data structuring at scale.
Proxy rotation is essential for Crunchbase scraping — ScrapeOps provides intelligent proxy management and request monitoring.
Conclusion
Crunchbase funding data reveals the hidden dynamics of startup ecosystems — where money flows, which sectors heat up, and which investors lead trends. Build your pipeline to track companies, analyze patterns, and detect emerging sectors. Start with targeted company tracking, then scale to sector-wide monitoring for comprehensive investment intelligence.
Top comments (0)