Software license changes can break your product overnight. Remember when HashiCorp moved from MPL to BSL? Build a monitoring system that catches license changes before they catch you.
Why This Matters
In 2023-2024, several major open-source projects changed licenses — Redis, Terraform, Elasticsearch. Each time, companies scrambled to assess impact. Automated monitoring gives you early warning.
Tracking GitHub License Changes
import requests
import pandas as pd
from datetime import datetime
import json
API_KEY = "YOUR_SCRAPERAPI_KEY" # Get one at https://www.scraperapi.com?fp_ref=the52
GITHUB_TOKEN = "your_github_token" # Optional, for higher rate limits
def check_github_license(owner, repo):
url = f"https://api.github.com/repos/{owner}/{repo}/license"
headers = {'Authorization': f'token {GITHUB_TOKEN}'} if GITHUB_TOKEN else {}
response = requests.get(url, headers=headers, timeout=30)
if response.status_code == 200:
data = response.json()
return {
'repo': f'{owner}/{repo}',
'license_key': data.get('license', {}).get('spdx_id', 'Unknown'),
'license_name': data.get('license', {}).get('name', 'Unknown'),
'checked_at': datetime.now().isoformat()
}
return None
critical_deps = [
('redis', 'redis'),
('hashicorp', 'terraform'),
('elastic', 'elasticsearch'),
('docker', 'compose'),
('grafana', 'grafana'),
('apache', 'kafka'),
('mongodb', 'mongo'),
('cockroachdb', 'cockroach'),
]
results = []
for owner, repo in critical_deps:
license_info = check_github_license(owner, repo)
if license_info:
results.append(license_info)
print(f"{owner}/{repo}: {license_info['license_key']}")
df = pd.DataFrame(results)
df.to_csv('license_snapshot.csv', index=False)
Detecting Changes
Compare snapshots to detect license modifications:
import sqlite3
conn = sqlite3.connect('license_monitor.db')
def detect_changes(current_snapshot):
alerts = []
for _, row in current_snapshot.iterrows():
previous = pd.read_sql(
f"SELECT * FROM licenses WHERE repo='{row['repo']}' ORDER BY checked_at DESC LIMIT 1",
conn
)
if not previous.empty:
if previous.iloc[0]['license_key'] != row['license_key']:
alerts.append({
'repo': row['repo'],
'old_license': previous.iloc[0]['license_key'],
'new_license': row['license_key'],
'detected_at': datetime.now().isoformat()
})
current_snapshot.to_sql('licenses', conn, if_exists='append', index=False)
return alerts
changes = detect_changes(df)
for change in changes:
print(f"ALERT: {change['repo']} changed from {change['old_license']} to {change['new_license']}")
Monitoring Package Registries
Check npm and PyPI for license metadata too. Use ThorData for reliable proxy rotation:
def check_npm_license(package_name):
url = f"https://registry.npmjs.org/{package_name}/latest"
response = requests.get(url, timeout=30)
if response.status_code == 200:
data = response.json()
return {
'package': package_name,
'registry': 'npm',
'license': data.get('license', 'Unknown'),
'version': data.get('version', 'Unknown')
}
return None
def check_pypi_license(package_name):
url = f"https://pypi.org/pypi/{package_name}/json"
response = requests.get(url, timeout=30)
if response.status_code == 200:
data = response.json()
return {
'package': package_name,
'registry': 'pypi',
'license': data['info'].get('license', 'Unknown'),
'version': data['info'].get('version', 'Unknown')
}
return None
npm_packages = ['express', 'react', 'next', 'fastify', 'prisma']
pypi_packages = ['django', 'flask', 'fastapi', 'celery', 'sqlalchemy']
for pkg in npm_packages:
info = check_npm_license(pkg)
if info:
print(f"npm/{pkg}: {info['license']}")
for pkg in pypi_packages:
info = check_pypi_license(pkg)
if info:
print(f"pypi/{pkg}: {info['license']}")
Scraping License Change Announcements
Monitor blog posts and changelogs for license announcements. ScrapeOps helps track scraper health:
from bs4 import BeautifulSoup
def scrape_changelog(repo_url):
changelog_urls = [
f"{repo_url}/blob/main/CHANGELOG.md",
f"{repo_url}/blob/main/LICENSE",
f"{repo_url}/releases"
]
for url in changelog_urls:
proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}"
response = requests.get(proxy_url, timeout=60)
if 'license' in response.text.lower():
return True
return False
Key Takeaways
- License changes often appear in LICENSE file commits weeks before announcements
- GitHub API provides real-time license detection for any public repo
- Cross-reference GitHub, npm, and PyPI — license metadata can be inconsistent
- ScraperAPI handles rate limiting and IP rotation for high-volume monitoring
Top comments (0)