DEV Community

agenthustler
agenthustler

Posted on

Scraping Carbon Credit Markets and ESG Data

Introduction

The carbon credit market exceeded $900 billion in 2025, and ESG (Environmental, Social, Governance) data drives trillions in investment decisions. Yet much of this data is scattered across registries, exchanges, and corporate reports. Building scrapers for carbon and ESG data gives you an edge in sustainability analysis and green finance.

Setup

import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import time
from datetime import datetime

# Handle complex registry sites
# Get your API key: https://www.scraperapi.com?fp_ref=the52
SCRAPER_API_KEY = "your_key_here"
BASE_URL = "http://api.scraperapi.com"
Enter fullscreen mode Exit fullscreen mode

Scraping Carbon Credit Registries

Public registries like Verra and Gold Standard list verified carbon offset projects:

def scrape_verra_registry(status="registered"):
    """Scrape Verra VCS carbon credit projects."""
    url = "https://registry.verra.org/app/search/VCS"

    params = {
        "api_key": SCRAPER_API_KEY,
        "url": url,
        "render": "true"
    }

    response = requests.get(BASE_URL, params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    projects = []
    for row in soup.select("table.project-table tbody tr"):
        cols = row.select("td")
        if len(cols) >= 6:
            projects.append({
                "project_id": cols[0].text.strip(),
                "name": cols[1].text.strip(),
                "country": cols[2].text.strip(),
                "methodology": cols[3].text.strip(),
                "credits_issued": cols[4].text.strip(),
                "status": cols[5].text.strip(),
                "scraped_at": datetime.now().isoformat()
            })

    return projects

def scrape_gold_standard():
    """Scrape Gold Standard project registry."""
    url = "https://registry.goldstandard.org/projects"

    params = {"api_key": SCRAPER_API_KEY, "url": url, "render": "true"}
    response = requests.get(BASE_URL, params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    projects = []
    for card in soup.select(".project-card"):
        projects.append({
            "name": card.select_one(".project-name").text.strip(),
            "type": card.select_one(".project-type").text.strip(),
            "country": card.select_one(".country").text.strip(),
            "credits": card.select_one(".credits").text.strip(),
            "registry": "Gold Standard"
        })

    return projects
Enter fullscreen mode Exit fullscreen mode

Tracking Carbon Credit Prices

def scrape_carbon_prices():
    """Track carbon credit prices across markets."""
    # Use residential proxies for financial data
    # ThorData: https://thordata.com/?via=the-data

    markets = {
        "EU_ETS": "https://ember-climate.org/data/data-tools/carbon-price-viewer/",
        "voluntary": "https://carboncredits.com/carbon-prices-today/"
    }

    prices = {}
    for market, url in markets.items():
        params = {"api_key": SCRAPER_API_KEY, "url": url, "render": "true"}
        response = requests.get(BASE_URL, params=params)
        soup = BeautifulSoup(response.text, "html.parser")

        price_elem = soup.select_one(".price-value, .carbon-price")
        if price_elem:
            prices[market] = {
                "price": price_elem.text.strip(),
                "timestamp": datetime.now().isoformat()
            }
        time.sleep(3)

    return prices
Enter fullscreen mode Exit fullscreen mode

ESG Score Collection

Corporate ESG scores from multiple rating agencies:

def scrape_esg_scores(company_name):
    """Collect ESG scores from public sources."""
    sources = [
        f"https://www.sustainalytics.com/esg-rating/{company_name}",
        f"https://www.msci.com/our-solutions/esg-investing/esg-ratings-climate-search-tool?query={company_name}"
    ]

    scores = []
    for url in sources:
        params = {"api_key": SCRAPER_API_KEY, "url": url, "render": "true"}
        response = requests.get(BASE_URL, params=params)
        soup = BeautifulSoup(response.text, "html.parser")

        score_elem = soup.select_one(".esg-score, .rating-value")
        if score_elem:
            scores.append({
                "company": company_name,
                "source": url.split("//")[1].split("/")[0],
                "score": score_elem.text.strip()
            })
        time.sleep(3)

    return scores
Enter fullscreen mode Exit fullscreen mode

Building a Carbon Market Dashboard

def build_carbon_dashboard():
    """Aggregate carbon market data into a dashboard."""
    # Monitor scraping jobs: https://scrapeops.io/?fpr=the-data28

    dashboard = {
        "prices": scrape_carbon_prices(),
        "new_projects": scrape_verra_registry()[:10],
        "timestamp": datetime.now().isoformat()
    }

    with open("carbon_dashboard.json", "w") as f:
        json.dump(dashboard, f, indent=2)

    return dashboard

def track_credit_issuance(db_path="carbon.db"):
    """Track carbon credit issuance over time."""
    import sqlite3

    conn = sqlite3.connect(db_path)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS credits (
            project_id TEXT,
            name TEXT,
            credits_issued TEXT,
            country TEXT,
            scraped_at TEXT
        )
    """)

    projects = scrape_verra_registry()
    df = pd.DataFrame(projects)
    df.to_sql("credits", conn, if_exists="append", index=False)
    conn.close()

    print(f"Stored {len(projects)} project records")
Enter fullscreen mode Exit fullscreen mode

Conclusion

Carbon credit and ESG data scraping enables powerful analysis for investors, researchers, and sustainability professionals. As regulatory requirements grow, automated data collection becomes essential. Use ScraperAPI to reliably access these data-rich but often protected registry sites, and ThorData for residential proxy rotation when accessing geo-restricted financial data.

Top comments (0)