Scraping Sustainable Investment ESG Ratings
ESG (Environmental, Social, Governance) ratings drive trillions in sustainable investment decisions. But these ratings are scattered across providers with different methodologies. Let's build a scraper that aggregates ESG data.
ESG Data Sources
- Yahoo Finance — Free ESG scores for public companies
- MSCI ESG Ratings — Industry standard
- SEC EDGAR — ESG-related filings
- CDP — Climate disclosure data
Setting Up
pip install requests beautifulsoup4 pandas yfinance matplotlib
Yahoo Finance ESG Scores
import yfinance as yf
import pandas as pd
def get_esg_scores(tickers):
results = []
for ticker in tickers:
stock = yf.Ticker(ticker)
esg = stock.sustainability
if esg is not None and not esg.empty:
scores = esg.to_dict()
results.append({
"ticker": ticker,
"total_esg": scores.get("Value", {}).get("totalEsg", None),
"environment": scores.get("Value", {}).get("environmentScore", None),
"social": scores.get("Value", {}).get("socialScore", None),
"governance": scores.get("Value", {}).get("governanceScore", None),
})
else:
results.append({"ticker": ticker, "total_esg": None})
return pd.DataFrame(results)
tech_tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "TSLA"]
esg_df = get_esg_scores(tech_tickers)
print(esg_df.to_string(index=False))
Scraping MSCI ESG Ratings
from bs4 import BeautifulSoup
def scrape_msci_esg(company_name):
params = {
"api_key": "YOUR_SCRAPERAPI_KEY",
"url": f"https://www.msci.com/our-solutions/esg-investing/esg-ratings-climate-search-tool?p={company_name}",
"render": "true",
"wait_for_selector": ".esg-rating"
}
resp = requests.get("https://api.scraperapi.com", params=params)
soup = BeautifulSoup(resp.text, "html.parser")
rating_elem = soup.select_one(".esg-rating-value")
if rating_elem:
return {"company": company_name, "msci_rating": rating_elem.get_text(strip=True)}
return None
ScraperAPI handles the JavaScript rendering that MSCI's search tool requires.
ESG Visualization
import matplotlib.pyplot as plt
def plot_esg_comparison(df):
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
categories = ["environment", "social", "governance"]
colors = ["#2ecc71", "#3498db", "#9b59b6"]
for ax, cat, color in zip(axes, categories, colors):
valid = df.dropna(subset=[cat])
ax.barh(valid["ticker"], valid[cat], color=color)
ax.set_title(f"{cat.title()} Score")
ax.set_xlabel("Risk Score (lower = better)")
plt.suptitle("ESG Risk Scores Comparison", fontsize=14)
plt.tight_layout()
plt.savefig("esg_comparison.png", dpi=150)
plot_esg_comparison(esg_df)
Portfolio ESG Screening
def screen_portfolio(tickers, max_esg_risk=25):
df = get_esg_scores(tickers)
df = df.dropna(subset=["total_esg"])
passed = df[df["total_esg"] <= max_esg_risk]
failed = df[df["total_esg"] > max_esg_risk]
print(f"Passed ESG screen ({max_esg_risk} threshold): {len(passed)}")
for _, row in passed.iterrows():
print(f" {row['ticker']}: {row['total_esg']:.1f}")
print(f"Failed: {len(failed)}")
screen_portfolio(["AAPL", "MSFT", "JNJ", "XOM", "CVX", "NEE", "TSLA"])
Scale with ThorData proxies and monitor with ScrapeOps.
Key Takeaways
- Yahoo Finance offers free ESG scores for most public companies
- MSCI ratings require JavaScript rendering to scrape
- Composite scoring from multiple sources gives more reliable ratings
- Portfolio screening automates sustainable investment analysis
ESG ratings are opinions, not facts. Use multiple sources and understand each methodology.
Top comments (0)