Cities publish crime data through open data portals in hundreds of different formats. Build a unified dashboard that aggregates and visualizes crime trends across municipalities.
The Problem with Municipal Data
Every city publishes crime data differently — Chicago uses CSV, NYC uses an API, LA uses a data portal. A unified scraper normalizes everything into one comparable format.
Scraping City Crime Portals
import requests
import pandas as pd
API_KEY = "YOUR_SCRAPERAPI_KEY" # Get one at https://www.scraperapi.com?fp_ref=the52
def scrape_chicago_crimes(limit=1000):
url = f"https://data.cityofchicago.org/resource/ijzp-q8t2.json?$limit={limit}&$order=date DESC"
response = requests.get(url, timeout=30)
data = response.json()
records = []
for crime in data:
records.append({
'city': 'Chicago',
'type': crime.get('primary_type', ''),
'date': crime.get('date', ''),
'latitude': crime.get('latitude'),
'longitude': crime.get('longitude'),
'location': crime.get('block', '')
})
return pd.DataFrame(records)
def scrape_nyc_crimes(limit=1000):
url = f"https://data.cityofnewyork.us/resource/5uac-w243.json?$limit={limit}&$order=cmplnt_fr_dt DESC"
response = requests.get(url, timeout=30)
data = response.json()
records = []
for crime in data:
records.append({
'city': 'NYC',
'type': crime.get('ofns_desc', ''),
'date': crime.get('cmplnt_fr_dt', ''),
'latitude': crime.get('latitude'),
'longitude': crime.get('longitude'),
'location': crime.get('boro_nm', '')
})
return pd.DataFrame(records)
chicago = scrape_chicago_crimes()
nyc = scrape_nyc_crimes()
combined = pd.concat([chicago, nyc], ignore_index=True)
print(f"Total records: {len(combined)}")
Adding More Cities
def scrape_la_crimes():
url = "https://data.lacity.org/resource/2nrs-mtv8.json?$limit=1000&$order=date_occ DESC"
response = requests.get(url, timeout=30)
data = response.json()
records = []
for crime in data:
records.append({
'city': 'Los Angeles',
'type': crime.get('crm_cd_desc', ''),
'date': crime.get('date_occ', ''),
'latitude': crime.get('lat'),
'longitude': crime.get('lon'),
'location': crime.get('area_name', '')
})
return pd.DataFrame(records)
la = scrape_la_crimes()
combined = pd.concat([combined, la], ignore_index=True)
Analyzing Crime Patterns
import matplotlib.pyplot as plt
combined['date_parsed'] = pd.to_datetime(combined['date'], errors='coerce')
combined['month'] = combined['date_parsed'].dt.to_period('M')
monthly = combined.groupby(['city', 'month']).size().reset_index(name='count')
for city in monthly['city'].unique():
city_data = monthly[monthly['city'] == city]
plt.plot(city_data['month'].astype(str), city_data['count'], label=city)
plt.legend()
plt.title('Monthly Crime Reports by City')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('crime_trends.png', dpi=150)
Building the Flask Dashboard
Use ThorData for government portals. Track with ScrapeOps:
from flask import Flask, render_template_string
app = Flask(__name__)
@app.route('/')
def dashboard():
stats = combined.groupby('city').agg(
total=('type', 'count'),
most_common=('type', lambda x: x.mode().iloc[0] if len(x) > 0 else 'N/A')
).to_dict('index')
return render_template_string('''
<h1>Crime Data Dashboard</h1>
{% for city, data in stats.items() %}
<h2>{{ city }}</h2>
<p>Total: {{ data.total }} | Most common: {{ data.most_common }}</p>
{% endfor %}
''', stats=stats)
if __name__ == '__main__':
app.run(debug=True, port=5000)
Applications
- Neighborhood safety scores for real estate
- Trend detection for local news organizations
- Resource allocation insights for city planners
- Most city portals use Socrata's SODA API — build once, adapt easily
- ScraperAPI handles JS-heavy portal pages
Top comments (0)