How to Build a Regulatory Change Alert System for Compliance

#python #tutorial #webdev #programming

Regulatory compliance is a constant challenge. New rules, amendments, and guidance documents drop regularly across multiple agencies. Missing a change can mean fines, lawsuits, or lost licenses. Here's how to automate regulatory monitoring.

The Compliance Challenge

Financial services alone deals with 200+ regulatory bodies globally. Each publishes updates on different websites, in different formats, on different schedules. Manual monitoring doesn't scale.

Regulatory Monitor

pip install requests beautifulsoup4 pandas schedule difflib

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import hashlib
import json
import difflib

class RegulatoryMonitor:
    def __init__(self, api_key):
        self.api_key = api_key
        self.state_file = "regulatory_state.json"
        self.state = self._load_state()

    def _load_state(self):
        try:
            with open(self.state_file) as f:
                return json.load(f)
        except FileNotFoundError:
            return {}

    def _save_state(self):
        with open(self.state_file, "w") as f:
            json.dump(self.state, f, indent=2)

    def fetch(self, url):
        proxy = f"http://api.scraperapi.com?api_key={self.api_key}&url={url}"
        return requests.get(proxy, timeout=30)

    def check_regulatory_page(self, name, url, selectors):
        resp = self.fetch(url)
        soup = BeautifulSoup(resp.text, "html.parser")

        items = []
        for el in soup.select(selectors["container"]):
            title = el.select_one(selectors["title"])
            date = el.select_one(selectors.get("date", ""))
            link = el.select_one("a")

            if title:
                items.append({
                    "title": title.text.strip(),
                    "date": date.text.strip() if date else "",
                    "url": link.get("href", "") if link else "",
                    "source": name
                })

        # Detect changes
        current_hash = hashlib.md5(
            json.dumps(items, sort_keys=True).encode()
        ).hexdigest()
        previous_hash = self.state.get(name, {}).get("hash", "")

        is_changed = current_hash != previous_hash
        new_items = []

        if is_changed and previous_hash:
            prev_titles = set(
                i["title"] for i in self.state.get(name, {}).get("items", [])
            )
            new_items = [i for i in items if i["title"] not in prev_titles]

        self.state[name] = {
            "hash": current_hash,
            "items": items,
            "last_checked": str(datetime.now())
        }
        self._save_state()

        return {"changed": is_changed, "new_items": new_items, "total": len(items)}

    def monitor_federal_register(self):
        return self.check_regulatory_page(
            "Federal Register",
            "https://www.federalregister.gov/documents/current",
            {"container": ".document-wrapper", "title": "h5 a", "date": ".metadata .date"}
        )

    def monitor_sec(self):
        return self.check_regulatory_page(
            "SEC",
            "https://www.sec.gov/rules/proposed.shtml",
            {"container": ".views-row", "title": ".views-field-title", "date": ".date-display-single"}
        )

    def monitor_eu_regulations(self):
        return self.check_regulatory_page(
            "EUR-Lex",
            "https://eur-lex.europa.eu/search.html?type=act",
            {"container": ".SearchResult", "title": ".title", "date": ".date"}
        )

    def track_document_changes(self, name, url):
        resp = self.fetch(url)
        soup = BeautifulSoup(resp.text, "html.parser")
        content = soup.get_text(strip=True)

        prev_content = self.state.get(f"doc_{name}", {}).get("content", "")

        if prev_content and content != prev_content:
            diff = list(difflib.unified_diff(
                prev_content.splitlines(),
                content.splitlines(),
                lineterm=""
            ))
            changes = [l for l in diff if l.startswith("+") and not l.startswith("+++")]
        else:
            changes = []

        self.state[f"doc_{name}"] = {
            "content": content,
            "last_checked": str(datetime.now())
        }
        self._save_state()
        return changes

# Usage
monitor = RegulatoryMonitor("YOUR_SCRAPERAPI_KEY")

sources = [
    ("Federal Register", monitor.monitor_federal_register),
    ("SEC", monitor.monitor_sec),
    ("EU Regulations", monitor.monitor_eu_regulations)
]

for name, check_fn in sources:
    result = check_fn()
    status = "CHANGED" if result["changed"] else "No changes"
    print(f"{name}: {status} ({result['total']} items)")
    if result["new_items"]:
        for item in result["new_items"][:3]:
            print(f"  NEW: {item['title']}")

Keyword Filtering for Your Industry

def filter_relevant_changes(monitor, industry_keywords):
    all_changes = []
    sources = {
        "Federal Register": monitor.monitor_federal_register,
        "SEC": monitor.monitor_sec,
    }

    for name, fn in sources.items():
        result = fn()
        for item in result.get("new_items", []):
            title_lower = item["title"].lower()
            matched = [kw for kw in industry_keywords if kw.lower() in title_lower]
            if matched:
                item["matched_keywords"] = matched
                all_changes.append(item)

    return all_changes

keywords = ["data privacy", "cybersecurity", "AI", "machine learning",
            "consumer protection", "financial reporting"]
relevant = filter_relevant_changes(monitor, keywords)

Proxy Requirements

Government sites often have strict rate limiting. ScraperAPI handles these reliably. For monitoring international regulators, ThorData provides country-specific IPs. Track monitoring uptime with ScrapeOps.

Conclusion

Automated regulatory monitoring transforms compliance from reactive firefighting to proactive management. Start with the regulators most relevant to your industry, add keyword filters, and build alerting on top. The cost of missing a regulatory change far exceeds the cost of building this system.

DEV Community