Regulatory compliance is a constant challenge. New rules, amendments, and guidance documents drop regularly across multiple agencies. Missing a change can mean fines, lawsuits, or lost licenses. Here's how to automate regulatory monitoring.
The Compliance Challenge
Financial services alone deals with 200+ regulatory bodies globally. Each publishes updates on different websites, in different formats, on different schedules. Manual monitoring doesn't scale.
Regulatory Monitor
pip install requests beautifulsoup4 pandas schedule difflib
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import hashlib
import json
import difflib
class RegulatoryMonitor:
def __init__(self, api_key):
self.api_key = api_key
self.state_file = "regulatory_state.json"
self.state = self._load_state()
def _load_state(self):
try:
with open(self.state_file) as f:
return json.load(f)
except FileNotFoundError:
return {}
def _save_state(self):
with open(self.state_file, "w") as f:
json.dump(self.state, f, indent=2)
def fetch(self, url):
proxy = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}"
return requests.get(proxy, timeout=30)
def check_regulatory_page(self, name, url, selectors):
resp = self.fetch(url)
soup = BeautifulSoup(resp.text, "html.parser")
items = []
for el in soup.select(selectors["container"]):
title = el.select_one(selectors["title"])
date = el.select_one(selectors.get("date", ""))
link = el.select_one("a")
if title:
items.append({
"title": title.text.strip(),
"date": date.text.strip() if date else "",
"url": link.get("href", "") if link else "",
"source": name
})
# Detect changes
current_hash = hashlib.md5(
json.dumps(items, sort_keys=True).encode()
).hexdigest()
previous_hash = self.state.get(name, {}).get("hash", "")
is_changed = current_hash != previous_hash
new_items = []
if is_changed and previous_hash:
prev_titles = set(
i["title"] for i in self.state.get(name, {}).get("items", [])
)
new_items = [i for i in items if i["title"] not in prev_titles]
self.state[name] = {
"hash": current_hash,
"items": items,
"last_checked": str(datetime.now())
}
self._save_state()
return {"changed": is_changed, "new_items": new_items, "total": len(items)}
def monitor_federal_register(self):
return self.check_regulatory_page(
"Federal Register",
"https://www.federalregister.gov/documents/current",
{"container": ".document-wrapper", "title": "h5 a", "date": ".metadata .date"}
)
def monitor_sec(self):
return self.check_regulatory_page(
"SEC",
"https://www.sec.gov/rules/proposed.shtml",
{"container": ".views-row", "title": ".views-field-title", "date": ".date-display-single"}
)
def monitor_eu_regulations(self):
return self.check_regulatory_page(
"EUR-Lex",
"https://eur-lex.europa.eu/search.html?type=act",
{"container": ".SearchResult", "title": ".title", "date": ".date"}
)
def track_document_changes(self, name, url):
resp = self.fetch(url)
soup = BeautifulSoup(resp.text, "html.parser")
content = soup.get_text(strip=True)
prev_content = self.state.get(f"doc_{name}", {}).get("content", "")
if prev_content and content != prev_content:
diff = list(difflib.unified_diff(
prev_content.splitlines(),
content.splitlines(),
lineterm=""
))
changes = [l for l in diff if l.startswith("+") and not l.startswith("+++")]
else:
changes = []
self.state[f"doc_{name}"] = {
"content": content,
"last_checked": str(datetime.now())
}
self._save_state()
return changes
# Usage
monitor = RegulatoryMonitor("YOUR_SCRAPERAPI_KEY")
sources = [
("Federal Register", monitor.monitor_federal_register),
("SEC", monitor.monitor_sec),
("EU Regulations", monitor.monitor_eu_regulations)
]
for name, check_fn in sources:
result = check_fn()
status = "CHANGED" if result["changed"] else "No changes"
print(f"{name}: {status} ({result['total']} items)")
if result["new_items"]:
for item in result["new_items"][:3]:
print(f" NEW: {item['title']}")
Keyword Filtering for Your Industry
def filter_relevant_changes(monitor, industry_keywords):
all_changes = []
sources = {
"Federal Register": monitor.monitor_federal_register,
"SEC": monitor.monitor_sec,
}
for name, fn in sources.items():
result = fn()
for item in result.get("new_items", []):
title_lower = item["title"].lower()
matched = [kw for kw in industry_keywords if kw.lower() in title_lower]
if matched:
item["matched_keywords"] = matched
all_changes.append(item)
return all_changes
keywords = ["data privacy", "cybersecurity", "AI", "machine learning",
"consumer protection", "financial reporting"]
relevant = filter_relevant_changes(monitor, keywords)
Proxy Requirements
Government sites often have strict rate limiting. ScraperAPI handles these reliably. For monitoring international regulators, ThorData provides country-specific IPs. Track monitoring uptime with ScrapeOps.
Conclusion
Automated regulatory monitoring transforms compliance from reactive firefighting to proactive management. Start with the regulators most relevant to your industry, add keyword filters, and build alerting on top. The cost of missing a regulatory change far exceeds the cost of building this system.
Top comments (0)