Personal finance aggregators like Mint and Plaid connect to your bank accounts and show you everything in one place. While you cannot replicate Plaid's bank partnerships, you can build a personal aggregator using screen scraping and CSV imports for your own financial data.
The Architecture
Bank CSV Exports / Screen Scraping --> Normalizer --> SQLite DB --> Analytics
Setting Up
pip install requests beautifulsoup4 pandas sqlite-utils playwright
playwright install chromium
The Aggregator Framework
import sqlite_utils
import pandas as pd
from datetime import datetime
import hashlib
class FinanceAggregator:
def __init__(self, db_path: str = "finances.db"):
self.db = sqlite_utils.Database(db_path)
self._init_tables()
def _init_tables(self):
if "transactions" not in self.db.table_names():
self.db["transactions"].create({
"id": str, "date": str, "description": str,
"amount": float, "category": str,
"account": str, "source": str, "imported_at": str
}, pk="id")
def add_transactions(self, transactions: list[dict], source: str):
for tx in transactions:
tx["source"] = source
tx["imported_at"] = datetime.utcnow().isoformat()
tx["id"] = hashlib.sha256(
f"{tx['date']}_{tx['description']}_{tx['amount']}_{source}".encode()
).hexdigest()[:16]
self.db["transactions"].upsert_all(transactions, pk="id")
print(f"Imported {len(transactions)} transactions from {source}")
CSV Import for Bank Exports
Most banks let you export CSVs. Build importers for common formats:
def import_bank_csv(file_path: str, bank_format: str = "chase") -> list[dict]:
df = pd.read_csv(file_path)
format_mappings = {
"chase": {"date_col": "Transaction Date", "desc_col": "Description",
"amount_col": "Amount", "category_col": "Category"},
"bofa": {"date_col": "Date", "desc_col": "Payee",
"amount_col": "Amount", "category_col": None},
"amex": {"date_col": "Date", "desc_col": "Description",
"amount_col": "Amount", "category_col": "Category"},
}
fmt = format_mappings.get(bank_format)
if not fmt:
raise ValueError(f"Unknown format: {bank_format}")
transactions = []
for _, row in df.iterrows():
tx = {
"date": pd.to_datetime(row[fmt["date_col"]]).strftime("%Y-%m-%d"),
"description": str(row[fmt["desc_col"]]),
"amount": float(row[fmt["amount_col"]]),
"category": str(row.get(fmt["category_col"], "Uncategorized")) if fmt["category_col"] else "Uncategorized"
}
transactions.append(tx)
return transactions
Auto-Categorization
CATEGORY_RULES = {
"Groceries": ["walmart", "trader joe", "whole foods", "kroger", "costco"],
"Restaurants": ["doordash", "ubereats", "grubhub", "starbucks", "chipotle"],
"Transportation": ["uber", "lyft", "shell", "chevron", "parking"],
"Subscriptions": ["netflix", "spotify", "hulu", "adobe", "youtube"],
"Utilities": ["electric", "water", "internet", "verizon", "att"],
}
def categorize_transaction(description: str) -> str:
desc_lower = description.lower()
for category, keywords in CATEGORY_RULES.items():
if any(kw in desc_lower for kw in keywords):
return category
return "Other"
def auto_categorize(aggregator: FinanceAggregator):
uncategorized = list(aggregator.db["transactions"].rows_where(
"category = ?", ["Uncategorized"]
))
updated = 0
for tx in uncategorized:
category = categorize_transaction(tx["description"])
if category != "Other":
aggregator.db["transactions"].update(tx["id"], {"category": category})
updated += 1
print(f"Auto-categorized {updated}/{len(uncategorized)} transactions")
Spending Analytics
def monthly_report(aggregator: FinanceAggregator, year: int, month: int):
start = f"{year}-{month:02d}-01"
end = f"{year}-{month:02d}-31"
txs = list(aggregator.db["transactions"].rows_where(
"date >= ? AND date <= ? AND amount < 0", [start, end]
))
df = pd.DataFrame(txs)
if df.empty:
print("No transactions found")
return
df["amount"] = df["amount"].abs()
by_category = df.groupby("category")["amount"].agg(["sum", "count", "mean"])
by_category = by_category.sort_values("sum", ascending=False)
total = df["amount"].sum()
print(f"Monthly Report: {year}-{month:02d}")
print(f"Total spending: ${total:,.2f}")
print(f"Transactions: {len(df)}")
for cat, row in by_category.iterrows():
pct = (row['sum'] / total) * 100
print(f" {cat}: ${row['sum']:,.2f} ({pct:.1f}%)")
# Usage
agg = FinanceAggregator()
txs = import_bank_csv("chase_export.csv", "chase")
agg.add_transactions(txs, "chase_checking")
auto_categorize(agg)
monthly_report(agg, 2026, 3)
Screen Scraping for Portals Without Export
For banking portals that require JavaScript rendering, use Playwright with ScraperAPI proxy rotation for reliable access. ThorData residential proxies prevent blocks on financial sites. Monitor your pipelines with ScrapeOps.
Security Notes
- Store credentials in environment variables or a keychain, never in code
- Use encrypted SQLite or SQLCipher for the database
- Run on your local machine only, never on shared servers
- This is for YOUR OWN accounts only
Building your own finance aggregator gives you complete control over your data and analytics. No subscriptions, no data sharing with third parties.
Top comments (0)