Crunchbase Funding Data: Track Startup Investments with Python

#python #webdev #tutorial #webscraping

Crunchbase is the definitive source for startup funding data — investment rounds, investors, acquisitions, and company profiles. Tracking this data helps investors, analysts, and founders understand market dynamics and spot emerging trends.

What Funding Data Can You Extract?

Funding rounds (seed, Series A-F, IPO)
Investor names and portfolios
Company valuations and revenue estimates
Acquisition data and exit events
Founder and executive profiles
Industry and geographic trends

Scraping Crunchbase Company Data

# Implementation is proprietary (that IS the moat).
# Skip the build — use our ready-made Apify actor:
# see the CTA below for the link (fpr=yw6md3).

Building a Funding Tracker

import pandas as pd
from datetime import datetime

class FundingTracker:
    def __init__(self, scraper, db_file='funding_data.csv'):
        self.scraper = scraper
        self.db_file = db_file

    def track_companies(self, company_slugs):
        """Collect and store funding data for multiple companies."""
        records = []
        for slug in company_slugs:
            try:
                profile = self.scraper.get_company_profile(slug)
                profile['slug'] = slug
                profile['collected_at'] = datetime.now().isoformat()
                records.append(profile)
                print(f"  Collected: {profile.get('name', slug)}")
            except Exception as e:
                print(f"  Error with {slug}: {e}")
            time.sleep(3)  # Respect rate limits

        df = pd.DataFrame(records)
        df.to_csv(self.db_file, mode='a', header=False, index=False)
        return df

    def analyze_sector(self, sector_companies):
        """Analyze funding patterns in a sector."""
        df = pd.read_csv(self.db_file)
        sector_df = df[df['slug'].isin(sector_companies)]

        print(f"Sector analysis ({len(sector_df)} companies):")
        print(f"  Companies tracked: {sector_df['name'].nunique()}")

        return sector_df

Investment Pattern Analysis

def analyze_investment_patterns(funding_data):
    """Find patterns in startup funding data."""
    df = pd.DataFrame(funding_data)

    if 'funding_amount' in df.columns:
        # Average round size by stage
        stage_avg = df.groupby('stage')['funding_amount'].agg(['mean', 'median', 'count'])
        print("Funding by stage:")
        print(stage_avg.to_string())

        # Top investors by deal count
        if 'investors' in df.columns:
            all_investors = []
            for inv_list in df['investors'].dropna():
                if isinstance(inv_list, str):
                    all_investors.extend(inv_list.split(', '))

            from collections import Counter
            top_investors = Counter(all_investors).most_common(15)
            print("\nMost active investors:")
            for investor, count in top_investors:
                print(f"  {investor}: {count} deals")

    return df

def detect_hot_sectors(funding_data, window_days=90):
    """Identify sectors with increasing funding activity."""
    df = pd.DataFrame(funding_data)
    df['date'] = pd.to_datetime(df.get('date', datetime.now()))

    recent = df[df['date'] > datetime.now() - pd.Timedelta(days=window_days)]

    if 'category' in recent.columns:
        sector_activity = recent.groupby('category').agg(
            deals=('name', 'count'),
            total_funding=('funding_amount', 'sum')
        ).sort_values('deals', ascending=False)

        print(f"Hot sectors (last {window_days} days):")
        print(sector_activity.head(10).to_string())

        return sector_activity

    return pd.DataFrame()

Building Investor Intelligence

def build_investor_profile(funding_data, investor_name):
    """Build a profile of an investor's activity."""
    df = pd.DataFrame(funding_data)

    # Filter deals involving this investor
    investor_deals = df[
        df.get('investors', '').str.contains(investor_name, na=False, case=False)
    ]

    if investor_deals.empty:
        print(f"No deals found for {investor_name}")
        return {}

    profile = {
        'name': investor_name,
        'total_deals': len(investor_deals),
        'companies': investor_deals['name'].tolist(),
    }

    if 'stage' in investor_deals.columns:
        profile['preferred_stages'] = investor_deals['stage'].value_counts().to_dict()

    if 'category' in investor_deals.columns:
        profile['preferred_sectors'] = investor_deals['category'].value_counts().to_dict()

    print(f"Investor: {investor_name}")
    print(f"  Total deals: {profile['total_deals']}")
    print(f"  Recent companies: {', '.join(profile['companies'][:5])}")

    return profile

Scaling with Cloud Solutions

For comprehensive Crunchbase data covering thousands of companies, the Crunchbase Scraper on Apify handles anti-bot measures and data structuring at scale.

Proxy rotation is essential for Crunchbase scraping — ScrapeOps provides intelligent proxy management and request monitoring.

Conclusion

Crunchbase funding data reveals the hidden dynamics of startup ecosystems — where money flows, which sectors heat up, and which investors lead trends. Build your pipeline to track companies, analyze patterns, and detect emerging sectors. Start with targeted company tracking, then scale to sector-wide monitoring for comprehensive investment intelligence.