Building interactive dashboards and reports has never been easier. Let's explore three powerful Python libraries that are revolutionizing data visualization.
Introduction
In today's data-driven world, the ability to create compelling, interactive visualizations is crucial for data scientists, analysts, and developers. While traditional plotting libraries like Matplotlib and Seaborn are excellent for static visualizations, modern applications demand interactive dashboards and real-time reporting capabilities.
This article explores three powerful Python frameworks that excel at creating web-based data visualization applications: Streamlit, Dash, and Bokeh. Each tool has its unique strengths and use cases, and we'll build practical examples to demonstrate their capabilities.
Streamlit: Simplicity Meets Power
Streamlit has gained massive popularity for its incredibly simple approach to building data apps. With just a few lines of Python code, you can create interactive web applications without any web development knowledge.
Key Features:
- Zero HTML, CSS, or JavaScript required
- Automatic reactivity and caching
- Built-in widgets and components
- Easy deployment options
- Strong community and ecosystem
Streamlit Example: Sales Dashboard
Let's build a comprehensive sales dashboard that demonstrates Streamlit's capabilities:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import random
# Set page configuration
st.set_page_config(
page_title="Sales Analytics Dashboard",
page_icon="๐",
layout="wide",
initial_sidebar_state="expanded"
)
# Generate sample data
@st.cache_data
def load_data():
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', end='2024-12-31', freq='D')
data = []
for date in dates:
for region in ['North America', 'Europe', 'Asia', 'South America']:
for product in ['Product A', 'Product B', 'Product C', 'Product D']:
sales = np.random.normal(1000, 200)
profit_margin = np.random.uniform(0.1, 0.3)
data.append({
'date': date,
'region': region,
'product': product,
'sales': max(0, sales),
'profit': max(0, sales * profit_margin),
'units_sold': max(1, int(sales / np.random.uniform(50, 150)))
})
return pd.DataFrame(data)
# Load data
df = load_data()
# Sidebar filters
st.sidebar.header("Dashboard Filters")
# Date range filter
date_range = st.sidebar.date_input(
"Select Date Range",
value=[df['date'].min().date(), df['date'].max().date()],
min_value=df['date'].min().date(),
max_value=df['date'].max().date()
)
# Region filter
regions = st.sidebar.multiselect(
"Select Regions",
options=df['region'].unique(),
default=df['region'].unique()
)
# Product filter
products = st.sidebar.multiselect(
"Select Products",
options=df['product'].unique(),
default=df['product'].unique()
)
# Filter data
filtered_df = df[
(df['date'].dt.date >= date_range[0]) &
(df['date'].dt.date <= date_range[1]) &
(df['region'].isin(regions)) &
(df['product'].isin(products))
]
# Main dashboard
st.title("๐ข Sales Analytics Dashboard")
st.markdown("---")
# Key metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
total_sales = filtered_df['sales'].sum()
st.metric(
label="Total Sales",
value=f"${total_sales:,.0f}",
delta=f"{total_sales/1000000:.1f}M"
)
with col2:
total_profit = filtered_df['profit'].sum()
profit_margin = (total_profit / total_sales) * 100 if total_sales > 0 else 0
st.metric(
label="Total Profit",
value=f"${total_profit:,.0f}",
delta=f"{profit_margin:.1f}% margin"
)
with col3:
total_units = filtered_df['units_sold'].sum()
st.metric(
label="Units Sold",
value=f"{total_units:,}",
delta=f"{len(filtered_df['product'].unique())} products"
)
with col4:
avg_order_value = total_sales / total_units if total_units > 0 else 0
st.metric(
label="Avg Order Value",
value=f"${avg_order_value:.2f}",
delta="Per unit"
)
st.markdown("---")
# Charts
col1, col2 = st.columns(2)
with col1:
st.subheader("๐ Sales Trend Over Time")
# Aggregate data by date
daily_sales = filtered_df.groupby('date')['sales'].sum().reset_index()
fig_trend = px.line(
daily_sales,
x='date',
y='sales',
title="Daily Sales Performance"
)
fig_trend.update_layout(
xaxis_title="Date",
yaxis_title="Sales ($)",
hovermode='x unified'
)
st.plotly_chart(fig_trend, use_container_width=True)
with col2:
st.subheader("๐ Sales by Region")
region_sales = filtered_df.groupby('region')['sales'].sum().reset_index()
fig_pie = px.pie(
region_sales,
values='sales',
names='region',
title="Regional Sales Distribution"
)
st.plotly_chart(fig_pie, use_container_width=True)
# Product performance
st.subheader("๐ฆ Product Performance Analysis")
product_metrics = filtered_df.groupby('product').agg({
'sales': 'sum',
'profit': 'sum',
'units_sold': 'sum'
}).reset_index()
fig_bar = px.bar(
product_metrics,
x='product',
y='sales',
color='profit',
title="Sales and Profit by Product"
)
fig_bar.update_layout(
xaxis_title="Product",
yaxis_title="Sales ($)",
coloraxis_colorbar_title="Profit ($)"
)
st.plotly_chart(fig_bar, use_container_width=True)
# Detailed data table
st.subheader("๐ Detailed Data View")
if st.checkbox("Show raw data"):
st.dataframe(
filtered_df.head(1000),
use_container_width=True
)
# Export functionality
st.markdown("---")
st.subheader("๐พ Export Data")
csv = filtered_df.to_csv(index=False)
st.download_button(
label="Download filtered data as CSV",
data=csv,
file_name=f"sales_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv"
)
# Footer
st.markdown("---")
st.markdown(
"""
<div style='text-align: center'>
<p>Built with โค๏ธ using Streamlit | Data refreshed daily</p>
</div>
""",
unsafe_allow_html=True
)
Requirements.txt for Streamlit:
streamlit==1.28.0
pandas==2.0.3
numpy==1.24.3
plotly==5.15.0
Dash: The Enterprise Solution
Plotly Dash is perfect for building production-ready analytical web applications. It provides more control over layout and styling compared to Streamlit, making it ideal for enterprise dashboards.
Key Features:
- Highly customizable with HTML/CSS components
- Excellent callback system for interactivity
- Built on React.js for robust performance
- Extensive theming and styling options
- Strong integration with Plotly charts
Dash Example: Financial Portfolio Tracker
import dash
from dash import dcc, html, Input, Output, dash_table
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf
# Initialize the Dash app
app = dash.Dash(__name__)
app.title = "Portfolio Analytics Dashboard"
# Sample portfolio data
PORTFOLIO = {
'AAPL': {'shares': 100, 'purchase_price': 150},
'GOOGL': {'shares': 50, 'purchase_price': 2500},
'MSFT': {'shares': 75, 'purchase_price': 300},
'AMZN': {'shares': 25, 'purchase_price': 3200},
'TSLA': {'shares': 30, 'purchase_price': 800}
}
# Fetch real stock data
@app.callback(
Output('portfolio-content', 'children'),
Input('refresh-button', 'n_clicks')
)
def update_portfolio(n_clicks):
# Get current stock prices
symbols = list(PORTFOLIO.keys())
stock_data = yf.download(symbols, period="1y", group_by='ticker')
# Calculate portfolio metrics
portfolio_value = 0
portfolio_data = []
for symbol, details in PORTFOLIO.items():
try:
current_price = stock_data[symbol]['Close'].iloc[-1]
shares = details['shares']
purchase_price = details['purchase_price']
current_value = current_price * shares
purchase_value = purchase_price * shares
gain_loss = current_value - purchase_value
gain_loss_pct = (gain_loss / purchase_value) * 100
portfolio_value += current_value
portfolio_data.append({
'Symbol': symbol,
'Shares': shares,
'Purchase Price': f"${purchase_price:.2f}",
'Current Price': f"${current_price:.2f}",
'Current Value': f"${current_value:.2f}",
'Gain/Loss': f"${gain_loss:.2f}",
'Gain/Loss %': f"{gain_loss_pct:.2f}%"
})
except:
# Handle data fetch errors
portfolio_data.append({
'Symbol': symbol,
'Shares': details['shares'],
'Purchase Price': f"${details['purchase_price']:.2f}",
'Current Price': "N/A",
'Current Value': "N/A",
'Gain/Loss': "N/A",
'Gain/Loss %': "N/A"
})
# Create portfolio performance chart
symbols_with_data = [s for s in symbols if s in stock_data.columns.levels[0]]
fig_performance = go.Figure()
for symbol in symbols_with_data:
try:
prices = stock_data[symbol]['Close']
fig_performance.add_trace(go.Scatter(
x=prices.index,
y=prices.values,
mode='lines',
name=symbol,
hovertemplate=f'<b>{symbol}</b><br>' +
'Date: %{x}<br>' +
'Price: $%{y:.2f}<extra></extra>'
))
except:
continue
fig_performance.update_layout(
title="Stock Price Performance (1 Year)",
xaxis_title="Date",
yaxis_title="Price ($)",
hovermode='x unified',
template="plotly_white"
)
# Portfolio allocation pie chart
allocation_data = []
for symbol, details in PORTFOLIO.items():
try:
current_price = stock_data[symbol]['Close'].iloc[-1]
value = current_price * details['shares']
allocation_data.append({'Symbol': symbol, 'Value': value})
except:
continue
if allocation_data:
allocation_df = pd.DataFrame(allocation_data)
fig_allocation = px.pie(
allocation_df,
values='Value',
names='Symbol',
title="Portfolio Allocation"
)
else:
fig_allocation = go.Figure()
fig_allocation.add_annotation(text="No data available",
showarrow=False,
font=dict(size=20))
return [
# Portfolio summary cards
html.Div([
html.Div([
html.H3(f"${portfolio_value:.2f}", className="metric-value"),
html.P("Total Portfolio Value", className="metric-label")
], className="metric-card"),
html.Div([
html.H3(f"{len(PORTFOLIO)}", className="metric-value"),
html.P("Holdings", className="metric-label")
], className="metric-card"),
html.Div([
html.H3("Active", className="metric-value"),
html.P("Portfolio Status", className="metric-label")
], className="metric-card")
], className="metrics-container"),
# Charts
html.Div([
html.Div([
dcc.Graph(figure=fig_performance)
], className="chart-container"),
html.Div([
dcc.Graph(figure=fig_allocation)
], className="chart-container")
], className="charts-row"),
# Portfolio table
html.Div([
html.H3("Portfolio Holdings"),
dash_table.DataTable(
data=portfolio_data,
columns=[{"name": i, "id": i} for i in portfolio_data[0].keys() if portfolio_data],
style_cell={'textAlign': 'left'},
style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'},
style_data_conditional=[
{
'if': {
'filter_query': '{Gain/Loss %} contains -',
'column_id': 'Gain/Loss %'
},
'backgroundColor': '#ffebee',
'color': 'red',
},
{
'if': {
'filter_query': '{Gain/Loss %} > 0',
'column_id': 'Gain/Loss %'
},
'backgroundColor': '#e8f5e8',
'color': 'green',
}
]
)
], className="table-container")
]
# App layout
app.layout = html.Div([
html.Div([
html.H1("๐ Portfolio Analytics Dashboard", className="main-title"),
html.Button("Refresh Data", id="refresh-button", className="refresh-btn")
], className="header"),
html.Div(id="portfolio-content"),
html.Footer([
html.P("Built with Plotly Dash | Real-time market data via Yahoo Finance")
], className="footer")
])
# CSS styling
app.index_string = '''
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>{%title%}</title>
{%favicon%}
{%css%}
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 0;
background-color: #f5f5f5;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 2rem;
display: flex;
justify-content: space-between;
align-items: center;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.main-title {
margin: 0;
font-size: 2.5rem;
font-weight: 300;
}
.refresh-btn {
background-color: white;
color: #667eea;
border: none;
padding: 0.75rem 2rem;
border-radius: 25px;
font-size: 1rem;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
}
.refresh-btn:hover {
transform: translateY(-2px);
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
}
.metrics-container {
display: flex;
justify-content: space-around;
margin: 2rem;
gap: 1rem;
}
.metric-card {
background: white;
padding: 2rem;
border-radius: 15px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
text-align: center;
flex: 1;
transition: transform 0.3s ease;
}
.metric-card:hover {
transform: translateY(-5px);
}
.metric-value {
font-size: 2.5rem;
font-weight: 700;
margin: 0;
color: #333;
}
.metric-label {
color: #666;
margin: 0.5rem 0 0 0;
font-size: 1rem;
}
.charts-row {
display: flex;
margin: 2rem;
gap: 1rem;
}
.chart-container {
background: white;
padding: 1rem;
border-radius: 15px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
flex: 1;
}
.table-container {
margin: 2rem;
background: white;
padding: 2rem;
border-radius: 15px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
.footer {
background-color: #333;
color: white;
text-align: center;
padding: 2rem;
margin-top: 3rem;
}
</style>
</head>
<body>
{%app_entry%}
<footer>
{%config%}
{%scripts%}
{%renderer%}
</footer>
</body>
</html>
'''
if __name__ == '__main__':
app.run_server(debug=True, host='0.0.0.0', port=8050)
Requirements.txt for Dash:
dash==2.14.0
plotly==5.15.0
pandas==2.0.3
numpy==1.24.3
yfinance==0.2.18
Bokeh: Interactive Web Visualizations
Bokeh excels at creating highly interactive, publication-ready visualizations for web browsers. It's particularly powerful for large datasets and complex interactions.
Key Features:
- High-performance visualization of large datasets
- Flexible and powerful interaction capabilities
- Server applications for real-time data
- Beautiful, publication-ready output
- Extensive widget library
Bokeh Example: Real-time Data Monitor
from bokeh.plotting import figure, curdoc
from bokeh.layouts import column, row, layout
from bokeh.models import ColumnDataSource, Select, Slider, Button, Div
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.io import show
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import asyncio
from functools import partial
# Generate initial data
def generate_data(n_points=100):
"""Generate sample IoT sensor data"""
np.random.seed(42)
timestamps = pd.date_range(
start=datetime.now() - timedelta(hours=1),
periods=n_points,
freq='30S'
)
# Simulate different sensor types
sensors = {
'temperature': {
'values': 20 + 5 * np.sin(np.linspace(0, 4*np.pi, n_points)) + np.random.normal(0, 1, n_points),
'unit': 'ยฐC',
'color': '#ff6b6b'
},
'humidity': {
'values': 50 + 20 * np.sin(np.linspace(0, 2*np.pi, n_points)) + np.random.normal(0, 3, n_points),
'unit': '%',
'color': '#4ecdc4'
},
'pressure': {
'values': 1013 + 10 * np.sin(np.linspace(0, 6*np.pi, n_points)) + np.random.normal(0, 2, n_points),
'unit': 'hPa',
'color': '#45b7d1'
},
'air_quality': {
'values': np.maximum(0, 100 + 50 * np.sin(np.linspace(0, 3*np.pi, n_points)) + np.random.normal(0, 10, n_points)),
'unit': 'AQI',
'color': '#96ceb4'
}
}
return timestamps, sensors
# Initialize data
timestamps, sensors = generate_data()
# Create data sources
sources = {}
for sensor_name in sensors.keys():
sources[sensor_name] = ColumnDataSource(data=dict(
x=timestamps,
y=sensors[sensor_name]['values']
))
# Create main plot
main_plot = figure(
title="Real-time Sensor Monitoring Dashboard",
x_axis_label="Time",
y_axis_label="Sensor Values",
x_axis_type="datetime",
width=800,
height=400,
tools="pan,wheel_zoom,box_zoom,reset,save"
)
# Add lines for each sensor
lines = {}
for sensor_name, sensor_data in sensors.items():
line = main_plot.line(
'x', 'y',
source=sources[sensor_name],
legend_label=f"{sensor_name.title()} ({sensor_data['unit']})",
line_color=sensor_data['color'],
line_width=2,
alpha=0.8
)
lines[sensor_name] = line
main_plot.legend.location = "top_left"
main_plot.legend.click_policy = "hide"
# Create individual sensor plots
sensor_plots = {}
for sensor_name, sensor_data in sensors.items():
p = figure(
title=f"{sensor_name.title()} Monitor",
x_axis_label="Time",
y_axis_label=f"{sensor_name.title()} ({sensor_data['unit']})",
x_axis_type="datetime",
width=350,
height=250,
tools="pan,wheel_zoom,box_zoom,reset"
)
p.line('x', 'y',
source=sources[sensor_name],
line_color=sensor_data['color'],
line_width=3)
p.circle('x', 'y',
source=sources[sensor_name],
color=sensor_data['color'],
size=4)
sensor_plots[sensor_name] = p
# Create controls
sensor_select = Select(
title="Focus Sensor:",
value="temperature",
options=[(name, name.title()) for name in sensors.keys()]
)
update_interval = Slider(
title="Update Interval (seconds):",
value=2,
start=1,
end=10,
step=1
)
start_button = Button(label="Start Monitoring", button_type="success")
stop_button = Button(label="Stop Monitoring", button_type="danger")
# Statistics display
stats_div = Div(text="<h3>Sensor Statistics</h3>")
def update_stats():
"""Update statistics display"""
stats_html = "<h3>๐ Live Sensor Statistics</h3><table style='width:100%; border-collapse: collapse;'>"
stats_html += "<tr style='background-color: #f0f0f0;'><th style='border: 1px solid #ddd; padding: 8px;'>Sensor</th><th style='border: 1px solid #ddd; padding: 8px;'>Current</th><th style='border: 1px solid #ddd; padding: 8px;'>Min</th><th style='border: 1px solid #ddd; padding: 8px;'>Max</th><th style='border: 1px solid #ddd; padding: 8px;'>Avg</th></tr>"
for sensor_name, sensor_data in sensors.items():
current_data = sources[sensor_name].data['y']
if len(current_data) > 0:
current = current_data[-1]
min_val = min(current_data)
max_val = max(current_data)
avg_val = sum(current_data) / len(current_data)
unit = sensor_data['unit']
stats_html += f"<tr><td style='border: 1px solid #ddd; padding: 8px;'>{sensor_name.title()}</td>"
stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{current:.1f} {unit}</td>"
stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{min_val:.1f} {unit}</td>"
stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{max_val:.1f} {unit}</td>"
stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{avg_val:.1f} {unit}</td></tr>"
stats_html += "</table>"
stats_div.text = stats_html
# Data update function
def update_data():
"""Simulate real-time data updates"""
global timestamps, sensors
# Generate new data point
new_time = timestamps[-1] + timedelta(seconds=30)
timestamps = timestamps.append(pd.Index([new_time]))
for sensor_name, sensor_data in sensors.items():
# Simulate sensor reading with some noise and trend
last_value = sensor_data['values'][-1]
noise = np.random.normal(0, 1)
trend = np.sin(len(sensor_data['values']) * 0.1) * 0.5
new_value = last_value + noise + trend
sensor_data['values'] = np.append(sensor_data['values'], new_value)
# Keep only last 100 points
if len(sensor_data['values']) > 100:
sensor_data['values'] = sensor_data['values'][-100:]
timestamps = timestamps[-100:]
# Update data sources
for sensor_name in sensors.keys():
sources[sensor_name].data = dict(
x=timestamps,
y=sensors[sensor_name]['values']
)
update_stats()
# Control callbacks
monitoring_active = False
def start_monitoring():
global monitoring_active
monitoring_active = True
curdoc().add_periodic_callback(update_data, update_interval.value * 1000)
def stop_monitoring():
global monitoring_active
monitoring_active = False
curdoc().remove_periodic_callback(update_data)
start_button.on_click(lambda: start_monitoring())
stop_button.on_click(lambda: stop_monitoring())
# Sensor selection callback
def update_focus(attr, old, new):
"""Update focus when sensor is selected"""
focused_sensor = sensor_select.value
# You could implement plot highlighting or other focus features here
pass
sensor_select.on_change('value', update_focus)
# Initialize statistics
update_stats()
# Layout
controls = column(
Div(text="<h2>๐ง Dashboard Controls</h2>"),
sensor_select,
update_interval,
row(start_button, stop_button),
stats_div
)
sensor_grid = layout([
[sensor_plots['temperature'], sensor_plots['humidity']],
[sensor_plots['pressure'], sensor_plots['air_quality']]
])
# Main layout
dashboard_layout = layout([
[Div(text="<h1>๐ IoT Sensor Monitoring Dashboard</h1>")],
[main_plot],
[controls, sensor_grid]
])
# Add to document
curdoc().add_root(dashboard_layout)
curdoc().title = "IoT Dashboard"
# Auto-start monitoring
start_monitoring()
Requirements.txt for Bokeh:
bokeh==3.2.0
pandas==2.0.3
numpy==1.24.3
Deployment Guide
Streamlit Deployment (Streamlit Cloud)
- Push your code to GitHub
- Go to share.streamlit.io
- Connect your GitHub repo
- Deploy with one click!
Dash Deployment (Heroku)
# Create Procfile
echo "web: gunicorn app:server" > Procfile
# Add to requirements.txt
echo "gunicorn==20.1.0" >> requirements.txt
# Deploy to Heroku
heroku create your-dash-app
git push heroku main
Bokeh Deployment (Bokeh Server)
# Run locally
bokeh serve dashboard.py --show
# Deploy to cloud server
bokeh serve dashboard.py --host 0.0.0.0 --port 5006 --allow-websocket-origin=*
**Best Practices
For All Tools:**
- Optimize Data Loading: Use caching mechanisms
- Responsive Design: Ensure mobile compatibility
- Error Handling: Implement graceful failure modes
- User Experience: Provide loading indicators and feedback
- Security: Validate inputs and sanitize data
Performance Tips:
- Use data sampling for large datasets
- Implement lazy loading for heavy computations
- Cache frequently accessed data
- Use vectorized operations with NumPy/Pandas
- Optimize plot rendering by reducing data points when necessary
Real-World Applications
Streamlit Success Stories:
- Data Science Prototyping: Quickly validate ML models
- Business Intelligence: Executive dashboards for KPI monitoring
- Educational Tools: Interactive learning applications
- Research Presentations: Academic data visualization
Dash in Production:
- Financial Trading Platforms: Real-time market analysis
- Manufacturing Dashboards: Production line monitoring
- Healthcare Analytics: Patient data visualization
- Supply Chain Management: Logistics optimization tools
Bokeh Applications:
- Scientific Computing: Large-scale data analysis
- Geospatial Analysis: Interactive mapping applications
- Network Visualization: Complex relationship mapping
- Time Series Analysis: High-frequency data monitoring
Code Repository
All the examples from this article are available on GitHub:
# Clone the repository
git clone https://github.com/yourusername/dataviz-tools-demo.git
cd dataviz-tools-demo
# Streamlit example
cd streamlit-dashboard
pip install -r requirements.txt
streamlit run app.py
# Dash example
cd ../dash-portfolio
pip install -r requirements.txt
python app.py
# Bokeh example
cd ../bokeh-monitor
pip install -r requirements.txt
bokeh serve dashboard.py --show
GitHub Actions for Automated Deployment
Here's a sample GitHub Actions workflow for automated deployment:
# .github/workflows/deploy.yml
name: Deploy Dashboard
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
deploy-streamlit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install dependencies
run: |
pip install -r streamlit-dashboard/requirements.txt
- name: Test Streamlit app
run: |
cd streamlit-dashboard
streamlit run app.py --server.headless true &
sleep 10
curl -f http://localhost:8501 || exit 1
- name: Deploy to Streamlit Cloud
# Streamlit Cloud auto-deploys from GitHub
run: echo "Deployed to Streamlit Cloud"
deploy-dash:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Deploy to Heroku
uses: akhileshns/heroku-deploy@v3.12.12
with:
heroku_api_key: ${{secrets.HEROKU_API_KEY}}
heroku_app_name: "your-dash-app"
heroku_email: "your-email@example.com"
appdir: "dash-portfolio"
Advanced Features and Extensions
Streamlit Advanced Components:
# Custom components example
import streamlit.components.v1 as components
# Embed custom HTML/JS
components.html("""
<div id="custom-widget">
<script>
// Custom interactive widget
function updateData() {
// Your custom JavaScript here
}
</script>
</div>
""", height=400)
# File uploader with processing
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.dataframe(df)
Dash Advanced Patterns:
# Multi-page app structure
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
# Navigation
navbar = dbc.NavbarSimple(
children=[
dbc.NavItem(dbc.NavLink("Dashboard", href="/dashboard")),
dbc.NavItem(dbc.NavLink("Analytics", href="/analytics")),
dbc.NavItem(dbc.NavLink("Settings", href="/settings")),
],
brand="My Dashboard",
brand_href="/",
color="primary",
dark=True,
)
# URL routing
@app.callback(Output('page-content', 'children'),
Input('url', 'pathname'))
def display_page(pathname):
if pathname == '/dashboard':
return dashboard_layout
elif pathname == '/analytics':
return analytics_layout
else:
return html.Div([
html.H1('404: Not found', className='text-danger'),
html.Hr(),
html.P(f'The pathname {pathname} was not recognised...')
])
Bokeh Server Applications:
# Advanced server app with WebSocket
from bokeh.server.server import Server
from tornado import gen
def modify_doc(doc):
# Your Bokeh app logic here
pass
# Custom server with additional routes
def create_server():
server = Server({'/dashboard': modify_doc},
port=5006,
allow_websocket_origin=["localhost:5006"])
return server
if __name__ == '__main__':
server = create_server()
server.start()
server.run_until_shutdown()
Testing and Quality Assurance
Unit Testing for Dashboard Applications:
# test_dashboard.py
import pytest
import pandas as pd
from unittest.mock import patch
import streamlit as st
def test_data_processing():
"""Test data processing functions"""
# Mock data
test_data = pd.DataFrame({
'date': pd.date_range('2023-01-01', periods=10),
'value': range(10)
})
# Test your processing functions
result = process_data(test_data)
assert len(result) == 10
assert 'processed_value' in result.columns
def test_streamlit_app():
"""Test Streamlit app components"""
with patch('streamlit.write') as mock_write:
# Test your app logic
main_app()
mock_write.assert_called()
Performance Testing:
# performance_test.py
import time
import pandas as pd
from memory_profiler import profile
@profile
def test_large_dataset_performance():
"""Test performance with large datasets"""
# Generate large dataset
large_df = pd.DataFrame({
'x': range(100000),
'y': range(100000)
})
start_time = time.time()
# Test your processing
result = your_processing_function(large_df)
execution_time = time.time() - start_time
# Assert performance requirements
assert execution_time < 5.0 # Should complete in under 5 seconds
assert len(result) > 0
Security Considerations
Data Protection:
# Secure data handling
import os
from cryptography.fernet import Fernet
class SecureDataHandler:
def __init__(self):
# Use environment variables for keys
key = os.environ.get('ENCRYPTION_KEY', Fernet.generate_key())
self.cipher_suite = Fernet(key)
def encrypt_data(self, data):
return self.cipher_suite.encrypt(data.encode())
def decrypt_data(self, encrypted_data):
return self.cipher_suite.decrypt(encrypted_data).decode()
# Input validation
def validate_input(user_input):
"""Validate and sanitize user inputs"""
if not isinstance(user_input, str):
raise ValueError("Input must be string")
# Remove potentially dangerous characters
safe_input = re.sub(r'[<>"\']', '', user_input)
return safe_input[:100] # Limit length
Monitoring and Analytics
Application Monitoring:
# monitoring.py
import logging
import time
from functools import wraps
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('dashboard.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def monitor_performance(func):
"""Decorator to monitor function performance"""
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
try:
result = func(*args, **kwargs)
execution_time = time.time() - start_time
logger.info(f"{func.__name__} executed in {execution_time:.2f}s")
return result
except Exception as e:
logger.error(f"Error in {func.__name__}: {str(e)}")
raise
return wrapper
# Usage tracking
class UsageTracker:
def __init__(self):
self.interactions = []
def track_interaction(self, action, user_id=None):
self.interactions.append({
'timestamp': time.time(),
'action': action,
'user_id': user_id
})
def get_usage_stats(self):
return {
'total_interactions': len(self.interactions),
'unique_actions': len(set(i['action'] for i in self.interactions))
}
Conclusion
The landscape of data visualization tools in Python offers something for every use case. Streamlit excels in rapid prototyping and simplicity, making it perfect for data scientists who want to quickly share their work. Dash provides the robustness and customization needed for production applications, while Bokeh offers unmatched flexibility for complex, interactive visualizations.
Top comments (0)