DEV Community

Sergio Alberto Colque Ponce
Sergio Alberto Colque Ponce

Posted on

Modern Data Visualization Tools: A Comprehensive Guide to Streamlit, Dash, and Bokeh

Building interactive dashboards and reports has never been easier. Let's explore three powerful Python libraries that are revolutionizing data visualization.

Introduction

In today's data-driven world, the ability to create compelling, interactive visualizations is crucial for data scientists, analysts, and developers. While traditional plotting libraries like Matplotlib and Seaborn are excellent for static visualizations, modern applications demand interactive dashboards and real-time reporting capabilities.
This article explores three powerful Python frameworks that excel at creating web-based data visualization applications: Streamlit, Dash, and Bokeh. Each tool has its unique strengths and use cases, and we'll build practical examples to demonstrate their capabilities.

Streamlit: Simplicity Meets Power
Streamlit has gained massive popularity for its incredibly simple approach to building data apps. With just a few lines of Python code, you can create interactive web applications without any web development knowledge.

Key Features:

  • Zero HTML, CSS, or JavaScript required
  • Automatic reactivity and caching
  • Built-in widgets and components
  • Easy deployment options
  • Strong community and ecosystem

Streamlit Example: Sales Dashboard
Let's build a comprehensive sales dashboard that demonstrates Streamlit's capabilities:

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import random

# Set page configuration
st.set_page_config(
    page_title="Sales Analytics Dashboard",
    page_icon="๐Ÿ“Š",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Generate sample data
@st.cache_data
def load_data():
    np.random.seed(42)
    dates = pd.date_range(start='2023-01-01', end='2024-12-31', freq='D')

    data = []
    for date in dates:
        for region in ['North America', 'Europe', 'Asia', 'South America']:
            for product in ['Product A', 'Product B', 'Product C', 'Product D']:
                sales = np.random.normal(1000, 200)
                profit_margin = np.random.uniform(0.1, 0.3)
                data.append({
                    'date': date,
                    'region': region,
                    'product': product,
                    'sales': max(0, sales),
                    'profit': max(0, sales * profit_margin),
                    'units_sold': max(1, int(sales / np.random.uniform(50, 150)))
                })

    return pd.DataFrame(data)

# Load data
df = load_data()

# Sidebar filters
st.sidebar.header("Dashboard Filters")

# Date range filter
date_range = st.sidebar.date_input(
    "Select Date Range",
    value=[df['date'].min().date(), df['date'].max().date()],
    min_value=df['date'].min().date(),
    max_value=df['date'].max().date()
)

# Region filter
regions = st.sidebar.multiselect(
    "Select Regions",
    options=df['region'].unique(),
    default=df['region'].unique()
)

# Product filter
products = st.sidebar.multiselect(
    "Select Products",
    options=df['product'].unique(),
    default=df['product'].unique()
)

# Filter data
filtered_df = df[
    (df['date'].dt.date >= date_range[0]) &
    (df['date'].dt.date <= date_range[1]) &
    (df['region'].isin(regions)) &
    (df['product'].isin(products))
]

# Main dashboard
st.title("๐Ÿข Sales Analytics Dashboard")
st.markdown("---")

# Key metrics
col1, col2, col3, col4 = st.columns(4)

with col1:
    total_sales = filtered_df['sales'].sum()
    st.metric(
        label="Total Sales",
        value=f"${total_sales:,.0f}",
        delta=f"{total_sales/1000000:.1f}M"
    )

with col2:
    total_profit = filtered_df['profit'].sum()
    profit_margin = (total_profit / total_sales) * 100 if total_sales > 0 else 0
    st.metric(
        label="Total Profit",
        value=f"${total_profit:,.0f}",
        delta=f"{profit_margin:.1f}% margin"
    )

with col3:
    total_units = filtered_df['units_sold'].sum()
    st.metric(
        label="Units Sold",
        value=f"{total_units:,}",
        delta=f"{len(filtered_df['product'].unique())} products"
    )

with col4:
    avg_order_value = total_sales / total_units if total_units > 0 else 0
    st.metric(
        label="Avg Order Value",
        value=f"${avg_order_value:.2f}",
        delta="Per unit"
    )

st.markdown("---")

# Charts
col1, col2 = st.columns(2)

with col1:
    st.subheader("๐Ÿ“ˆ Sales Trend Over Time")

    # Aggregate data by date
    daily_sales = filtered_df.groupby('date')['sales'].sum().reset_index()

    fig_trend = px.line(
        daily_sales, 
        x='date', 
        y='sales',
        title="Daily Sales Performance"
    )
    fig_trend.update_layout(
        xaxis_title="Date",
        yaxis_title="Sales ($)",
        hovermode='x unified'
    )
    st.plotly_chart(fig_trend, use_container_width=True)

with col2:
    st.subheader("๐ŸŒ Sales by Region")

    region_sales = filtered_df.groupby('region')['sales'].sum().reset_index()

    fig_pie = px.pie(
        region_sales,
        values='sales',
        names='region',
        title="Regional Sales Distribution"
    )
    st.plotly_chart(fig_pie, use_container_width=True)

# Product performance
st.subheader("๐Ÿ“ฆ Product Performance Analysis")

product_metrics = filtered_df.groupby('product').agg({
    'sales': 'sum',
    'profit': 'sum',
    'units_sold': 'sum'
}).reset_index()

fig_bar = px.bar(
    product_metrics,
    x='product',
    y='sales',
    color='profit',
    title="Sales and Profit by Product"
)
fig_bar.update_layout(
    xaxis_title="Product",
    yaxis_title="Sales ($)",
    coloraxis_colorbar_title="Profit ($)"
)
st.plotly_chart(fig_bar, use_container_width=True)

# Detailed data table
st.subheader("๐Ÿ“Š Detailed Data View")

if st.checkbox("Show raw data"):
    st.dataframe(
        filtered_df.head(1000),
        use_container_width=True
    )

# Export functionality
st.markdown("---")
st.subheader("๐Ÿ’พ Export Data")

csv = filtered_df.to_csv(index=False)
st.download_button(
    label="Download filtered data as CSV",
    data=csv,
    file_name=f"sales_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
    mime="text/csv"
)

# Footer
st.markdown("---")
st.markdown(
    """
    <div style='text-align: center'>
        <p>Built with โค๏ธ using Streamlit | Data refreshed daily</p>
    </div>
    """,
    unsafe_allow_html=True
)
Enter fullscreen mode Exit fullscreen mode

Requirements.txt for Streamlit:

streamlit==1.28.0
pandas==2.0.3
numpy==1.24.3
plotly==5.15.0
Enter fullscreen mode Exit fullscreen mode

Dash: The Enterprise Solution
Plotly Dash is perfect for building production-ready analytical web applications. It provides more control over layout and styling compared to Streamlit, making it ideal for enterprise dashboards.

Key Features:

  • Highly customizable with HTML/CSS components
  • Excellent callback system for interactivity
  • Built on React.js for robust performance
  • Extensive theming and styling options
  • Strong integration with Plotly charts

Dash Example: Financial Portfolio Tracker

import dash
from dash import dcc, html, Input, Output, dash_table
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf

# Initialize the Dash app
app = dash.Dash(__name__)
app.title = "Portfolio Analytics Dashboard"

# Sample portfolio data
PORTFOLIO = {
    'AAPL': {'shares': 100, 'purchase_price': 150},
    'GOOGL': {'shares': 50, 'purchase_price': 2500},
    'MSFT': {'shares': 75, 'purchase_price': 300},
    'AMZN': {'shares': 25, 'purchase_price': 3200},
    'TSLA': {'shares': 30, 'purchase_price': 800}
}

# Fetch real stock data
@app.callback(
    Output('portfolio-content', 'children'),
    Input('refresh-button', 'n_clicks')
)
def update_portfolio(n_clicks):
    # Get current stock prices
    symbols = list(PORTFOLIO.keys())
    stock_data = yf.download(symbols, period="1y", group_by='ticker')

    # Calculate portfolio metrics
    portfolio_value = 0
    portfolio_data = []

    for symbol, details in PORTFOLIO.items():
        try:
            current_price = stock_data[symbol]['Close'].iloc[-1]
            shares = details['shares']
            purchase_price = details['purchase_price']

            current_value = current_price * shares
            purchase_value = purchase_price * shares
            gain_loss = current_value - purchase_value
            gain_loss_pct = (gain_loss / purchase_value) * 100

            portfolio_value += current_value

            portfolio_data.append({
                'Symbol': symbol,
                'Shares': shares,
                'Purchase Price': f"${purchase_price:.2f}",
                'Current Price': f"${current_price:.2f}",
                'Current Value': f"${current_value:.2f}",
                'Gain/Loss': f"${gain_loss:.2f}",
                'Gain/Loss %': f"{gain_loss_pct:.2f}%"
            })
        except:
            # Handle data fetch errors
            portfolio_data.append({
                'Symbol': symbol,
                'Shares': details['shares'],
                'Purchase Price': f"${details['purchase_price']:.2f}",
                'Current Price': "N/A",
                'Current Value': "N/A",
                'Gain/Loss': "N/A",
                'Gain/Loss %': "N/A"
            })

    # Create portfolio performance chart
    symbols_with_data = [s for s in symbols if s in stock_data.columns.levels[0]]

    fig_performance = go.Figure()

    for symbol in symbols_with_data:
        try:
            prices = stock_data[symbol]['Close']
            fig_performance.add_trace(go.Scatter(
                x=prices.index,
                y=prices.values,
                mode='lines',
                name=symbol,
                hovertemplate=f'<b>{symbol}</b><br>' +
                             'Date: %{x}<br>' +
                             'Price: $%{y:.2f}<extra></extra>'
            ))
        except:
            continue

    fig_performance.update_layout(
        title="Stock Price Performance (1 Year)",
        xaxis_title="Date",
        yaxis_title="Price ($)",
        hovermode='x unified',
        template="plotly_white"
    )

    # Portfolio allocation pie chart
    allocation_data = []
    for symbol, details in PORTFOLIO.items():
        try:
            current_price = stock_data[symbol]['Close'].iloc[-1]
            value = current_price * details['shares']
            allocation_data.append({'Symbol': symbol, 'Value': value})
        except:
            continue

    if allocation_data:
        allocation_df = pd.DataFrame(allocation_data)
        fig_allocation = px.pie(
            allocation_df,
            values='Value',
            names='Symbol',
            title="Portfolio Allocation"
        )
    else:
        fig_allocation = go.Figure()
        fig_allocation.add_annotation(text="No data available", 
                                    showarrow=False, 
                                    font=dict(size=20))

    return [
        # Portfolio summary cards
        html.Div([
            html.Div([
                html.H3(f"${portfolio_value:.2f}", className="metric-value"),
                html.P("Total Portfolio Value", className="metric-label")
            ], className="metric-card"),

            html.Div([
                html.H3(f"{len(PORTFOLIO)}", className="metric-value"),
                html.P("Holdings", className="metric-label")
            ], className="metric-card"),

            html.Div([
                html.H3("Active", className="metric-value"),
                html.P("Portfolio Status", className="metric-label")
            ], className="metric-card")
        ], className="metrics-container"),

        # Charts
        html.Div([
            html.Div([
                dcc.Graph(figure=fig_performance)
            ], className="chart-container"),

            html.Div([
                dcc.Graph(figure=fig_allocation)
            ], className="chart-container")
        ], className="charts-row"),

        # Portfolio table
        html.Div([
            html.H3("Portfolio Holdings"),
            dash_table.DataTable(
                data=portfolio_data,
                columns=[{"name": i, "id": i} for i in portfolio_data[0].keys() if portfolio_data],
                style_cell={'textAlign': 'left'},
                style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'},
                style_data_conditional=[
                    {
                        'if': {
                            'filter_query': '{Gain/Loss %} contains -',
                            'column_id': 'Gain/Loss %'
                        },
                        'backgroundColor': '#ffebee',
                        'color': 'red',
                    },
                    {
                        'if': {
                            'filter_query': '{Gain/Loss %} > 0',
                            'column_id': 'Gain/Loss %'
                        },
                        'backgroundColor': '#e8f5e8',
                        'color': 'green',
                    }
                ]
            )
        ], className="table-container")
    ]

# App layout
app.layout = html.Div([
    html.Div([
        html.H1("๐Ÿ“ˆ Portfolio Analytics Dashboard", className="main-title"),
        html.Button("Refresh Data", id="refresh-button", className="refresh-btn")
    ], className="header"),

    html.Div(id="portfolio-content"),

    html.Footer([
        html.P("Built with Plotly Dash | Real-time market data via Yahoo Finance")
    ], className="footer")
])

# CSS styling
app.index_string = '''
<!DOCTYPE html>
<html>
    <head>
        {%metas%}
        <title>{%title%}</title>
        {%favicon%}
        {%css%}
        <style>
            body {
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                margin: 0;
                padding: 0;
                background-color: #f5f5f5;
            }
            .header {
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                padding: 2rem;
                display: flex;
                justify-content: space-between;
                align-items: center;
                box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            }
            .main-title {
                margin: 0;
                font-size: 2.5rem;
                font-weight: 300;
            }
            .refresh-btn {
                background-color: white;
                color: #667eea;
                border: none;
                padding: 0.75rem 2rem;
                border-radius: 25px;
                font-size: 1rem;
                font-weight: 600;
                cursor: pointer;
                transition: all 0.3s ease;
            }
            .refresh-btn:hover {
                transform: translateY(-2px);
                box-shadow: 0 4px 15px rgba(0,0,0,0.2);
            }
            .metrics-container {
                display: flex;
                justify-content: space-around;
                margin: 2rem;
                gap: 1rem;
            }
            .metric-card {
                background: white;
                padding: 2rem;
                border-radius: 15px;
                box-shadow: 0 4px 15px rgba(0,0,0,0.1);
                text-align: center;
                flex: 1;
                transition: transform 0.3s ease;
            }
            .metric-card:hover {
                transform: translateY(-5px);
            }
            .metric-value {
                font-size: 2.5rem;
                font-weight: 700;
                margin: 0;
                color: #333;
            }
            .metric-label {
                color: #666;
                margin: 0.5rem 0 0 0;
                font-size: 1rem;
            }
            .charts-row {
                display: flex;
                margin: 2rem;
                gap: 1rem;
            }
            .chart-container {
                background: white;
                padding: 1rem;
                border-radius: 15px;
                box-shadow: 0 4px 15px rgba(0,0,0,0.1);
                flex: 1;
            }
            .table-container {
                margin: 2rem;
                background: white;
                padding: 2rem;
                border-radius: 15px;
                box-shadow: 0 4px 15px rgba(0,0,0,0.1);
            }
            .footer {
                background-color: #333;
                color: white;
                text-align: center;
                padding: 2rem;
                margin-top: 3rem;
            }
        </style>
    </head>
    <body>
        {%app_entry%}
        <footer>
            {%config%}
            {%scripts%}
            {%renderer%}
        </footer>
    </body>
</html>
'''

if __name__ == '__main__':
    app.run_server(debug=True, host='0.0.0.0', port=8050)
Enter fullscreen mode Exit fullscreen mode

Requirements.txt for Dash:

dash==2.14.0
plotly==5.15.0
pandas==2.0.3
numpy==1.24.3
yfinance==0.2.18
Enter fullscreen mode Exit fullscreen mode

Bokeh: Interactive Web Visualizations

Bokeh excels at creating highly interactive, publication-ready visualizations for web browsers. It's particularly powerful for large datasets and complex interactions.

Key Features:

  • High-performance visualization of large datasets
  • Flexible and powerful interaction capabilities
  • Server applications for real-time data
  • Beautiful, publication-ready output
  • Extensive widget library

Bokeh Example: Real-time Data Monitor

from bokeh.plotting import figure, curdoc
from bokeh.layouts import column, row, layout
from bokeh.models import ColumnDataSource, Select, Slider, Button, Div
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.io import show
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import asyncio
from functools import partial

# Generate initial data
def generate_data(n_points=100):
    """Generate sample IoT sensor data"""
    np.random.seed(42)
    timestamps = pd.date_range(
        start=datetime.now() - timedelta(hours=1),
        periods=n_points,
        freq='30S'
    )

    # Simulate different sensor types
    sensors = {
        'temperature': {
            'values': 20 + 5 * np.sin(np.linspace(0, 4*np.pi, n_points)) + np.random.normal(0, 1, n_points),
            'unit': 'ยฐC',
            'color': '#ff6b6b'
        },
        'humidity': {
            'values': 50 + 20 * np.sin(np.linspace(0, 2*np.pi, n_points)) + np.random.normal(0, 3, n_points),
            'unit': '%',
            'color': '#4ecdc4'
        },
        'pressure': {
            'values': 1013 + 10 * np.sin(np.linspace(0, 6*np.pi, n_points)) + np.random.normal(0, 2, n_points),
            'unit': 'hPa',
            'color': '#45b7d1'
        },
        'air_quality': {
            'values': np.maximum(0, 100 + 50 * np.sin(np.linspace(0, 3*np.pi, n_points)) + np.random.normal(0, 10, n_points)),
            'unit': 'AQI',
            'color': '#96ceb4'
        }
    }

    return timestamps, sensors

# Initialize data
timestamps, sensors = generate_data()

# Create data sources
sources = {}
for sensor_name in sensors.keys():
    sources[sensor_name] = ColumnDataSource(data=dict(
        x=timestamps,
        y=sensors[sensor_name]['values']
    ))

# Create main plot
main_plot = figure(
    title="Real-time Sensor Monitoring Dashboard",
    x_axis_label="Time",
    y_axis_label="Sensor Values",
    x_axis_type="datetime",
    width=800,
    height=400,
    tools="pan,wheel_zoom,box_zoom,reset,save"
)

# Add lines for each sensor
lines = {}
for sensor_name, sensor_data in sensors.items():
    line = main_plot.line(
        'x', 'y',
        source=sources[sensor_name],
        legend_label=f"{sensor_name.title()} ({sensor_data['unit']})",
        line_color=sensor_data['color'],
        line_width=2,
        alpha=0.8
    )
    lines[sensor_name] = line

main_plot.legend.location = "top_left"
main_plot.legend.click_policy = "hide"

# Create individual sensor plots
sensor_plots = {}
for sensor_name, sensor_data in sensors.items():
    p = figure(
        title=f"{sensor_name.title()} Monitor",
        x_axis_label="Time",
        y_axis_label=f"{sensor_name.title()} ({sensor_data['unit']})",
        x_axis_type="datetime",
        width=350,
        height=250,
        tools="pan,wheel_zoom,box_zoom,reset"
    )

    p.line('x', 'y',
           source=sources[sensor_name],
           line_color=sensor_data['color'],
           line_width=3)

    p.circle('x', 'y',
             source=sources[sensor_name],
             color=sensor_data['color'],
             size=4)

    sensor_plots[sensor_name] = p

# Create controls
sensor_select = Select(
    title="Focus Sensor:",
    value="temperature",
    options=[(name, name.title()) for name in sensors.keys()]
)

update_interval = Slider(
    title="Update Interval (seconds):",
    value=2,
    start=1,
    end=10,
    step=1
)

start_button = Button(label="Start Monitoring", button_type="success")
stop_button = Button(label="Stop Monitoring", button_type="danger")

# Statistics display
stats_div = Div(text="<h3>Sensor Statistics</h3>")

def update_stats():
    """Update statistics display"""
    stats_html = "<h3>๐Ÿ“Š Live Sensor Statistics</h3><table style='width:100%; border-collapse: collapse;'>"
    stats_html += "<tr style='background-color: #f0f0f0;'><th style='border: 1px solid #ddd; padding: 8px;'>Sensor</th><th style='border: 1px solid #ddd; padding: 8px;'>Current</th><th style='border: 1px solid #ddd; padding: 8px;'>Min</th><th style='border: 1px solid #ddd; padding: 8px;'>Max</th><th style='border: 1px solid #ddd; padding: 8px;'>Avg</th></tr>"

    for sensor_name, sensor_data in sensors.items():
        current_data = sources[sensor_name].data['y']
        if len(current_data) > 0:
            current = current_data[-1]
            min_val = min(current_data)
            max_val = max(current_data)
            avg_val = sum(current_data) / len(current_data)
            unit = sensor_data['unit']

            stats_html += f"<tr><td style='border: 1px solid #ddd; padding: 8px;'>{sensor_name.title()}</td>"
            stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{current:.1f} {unit}</td>"
            stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{min_val:.1f} {unit}</td>"
            stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{max_val:.1f} {unit}</td>"
            stats_html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{avg_val:.1f} {unit}</td></tr>"

    stats_html += "</table>"
    stats_div.text = stats_html

# Data update function
def update_data():
    """Simulate real-time data updates"""
    global timestamps, sensors

    # Generate new data point
    new_time = timestamps[-1] + timedelta(seconds=30)
    timestamps = timestamps.append(pd.Index([new_time]))

    for sensor_name, sensor_data in sensors.items():
        # Simulate sensor reading with some noise and trend
        last_value = sensor_data['values'][-1]
        noise = np.random.normal(0, 1)
        trend = np.sin(len(sensor_data['values']) * 0.1) * 0.5
        new_value = last_value + noise + trend

        sensor_data['values'] = np.append(sensor_data['values'], new_value)

        # Keep only last 100 points
        if len(sensor_data['values']) > 100:
            sensor_data['values'] = sensor_data['values'][-100:]
            timestamps = timestamps[-100:]

    # Update data sources
    for sensor_name in sensors.keys():
        sources[sensor_name].data = dict(
            x=timestamps,
            y=sensors[sensor_name]['values']
        )

    update_stats()

# Control callbacks
monitoring_active = False

def start_monitoring():
    global monitoring_active
    monitoring_active = True
    curdoc().add_periodic_callback(update_data, update_interval.value * 1000)

def stop_monitoring():
    global monitoring_active
    monitoring_active = False
    curdoc().remove_periodic_callback(update_data)

start_button.on_click(lambda: start_monitoring())
stop_button.on_click(lambda: stop_monitoring())

# Sensor selection callback
def update_focus(attr, old, new):
    """Update focus when sensor is selected"""
    focused_sensor = sensor_select.value
    # You could implement plot highlighting or other focus features here
    pass

sensor_select.on_change('value', update_focus)

# Initialize statistics
update_stats()

# Layout
controls = column(
    Div(text="<h2>๐Ÿ”ง Dashboard Controls</h2>"),
    sensor_select,
    update_interval,
    row(start_button, stop_button),
    stats_div
)

sensor_grid = layout([
    [sensor_plots['temperature'], sensor_plots['humidity']],
    [sensor_plots['pressure'], sensor_plots['air_quality']]
])

# Main layout
dashboard_layout = layout([
    [Div(text="<h1>๐ŸŒ IoT Sensor Monitoring Dashboard</h1>")],
    [main_plot],
    [controls, sensor_grid]
])

# Add to document
curdoc().add_root(dashboard_layout)
curdoc().title = "IoT Dashboard"

# Auto-start monitoring
start_monitoring()
Enter fullscreen mode Exit fullscreen mode

Requirements.txt for Bokeh:

bokeh==3.2.0
pandas==2.0.3
numpy==1.24.3
Enter fullscreen mode Exit fullscreen mode

Deployment Guide

Streamlit Deployment (Streamlit Cloud)

  1. Push your code to GitHub
  2. Go to share.streamlit.io
  3. Connect your GitHub repo
  4. Deploy with one click!

Dash Deployment (Heroku)

# Create Procfile
echo "web: gunicorn app:server" > Procfile

# Add to requirements.txt
echo "gunicorn==20.1.0" >> requirements.txt

# Deploy to Heroku
heroku create your-dash-app
git push heroku main
Enter fullscreen mode Exit fullscreen mode

Bokeh Deployment (Bokeh Server)

# Run locally
bokeh serve dashboard.py --show

# Deploy to cloud server
bokeh serve dashboard.py --host 0.0.0.0 --port 5006 --allow-websocket-origin=*
Enter fullscreen mode Exit fullscreen mode

**Best Practices

For All Tools:**

  1. Optimize Data Loading: Use caching mechanisms
  2. Responsive Design: Ensure mobile compatibility
  3. Error Handling: Implement graceful failure modes
  4. User Experience: Provide loading indicators and feedback
  5. Security: Validate inputs and sanitize data

Performance Tips:

  • Use data sampling for large datasets
  • Implement lazy loading for heavy computations
  • Cache frequently accessed data
  • Use vectorized operations with NumPy/Pandas
  • Optimize plot rendering by reducing data points when necessary

Real-World Applications

Streamlit Success Stories:

  • Data Science Prototyping: Quickly validate ML models
  • Business Intelligence: Executive dashboards for KPI monitoring
  • Educational Tools: Interactive learning applications
  • Research Presentations: Academic data visualization

Dash in Production:

  • Financial Trading Platforms: Real-time market analysis
  • Manufacturing Dashboards: Production line monitoring
  • Healthcare Analytics: Patient data visualization
  • Supply Chain Management: Logistics optimization tools

Bokeh Applications:

  • Scientific Computing: Large-scale data analysis
  • Geospatial Analysis: Interactive mapping applications
  • Network Visualization: Complex relationship mapping
  • Time Series Analysis: High-frequency data monitoring

Code Repository
All the examples from this article are available on GitHub:

# Clone the repository
git clone https://github.com/yourusername/dataviz-tools-demo.git
cd dataviz-tools-demo

# Streamlit example
cd streamlit-dashboard
pip install -r requirements.txt
streamlit run app.py

# Dash example
cd ../dash-portfolio
pip install -r requirements.txt
python app.py

# Bokeh example
cd ../bokeh-monitor
pip install -r requirements.txt
bokeh serve dashboard.py --show
Enter fullscreen mode Exit fullscreen mode

GitHub Actions for Automated Deployment
Here's a sample GitHub Actions workflow for automated deployment:

# .github/workflows/deploy.yml
name: Deploy Dashboard

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  deploy-streamlit:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.9'

    - name: Install dependencies
      run: |
        pip install -r streamlit-dashboard/requirements.txt

    - name: Test Streamlit app
      run: |
        cd streamlit-dashboard
        streamlit run app.py --server.headless true &
        sleep 10
        curl -f http://localhost:8501 || exit 1

    - name: Deploy to Streamlit Cloud
      # Streamlit Cloud auto-deploys from GitHub
      run: echo "Deployed to Streamlit Cloud"

  deploy-dash:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3

    - name: Deploy to Heroku
      uses: akhileshns/heroku-deploy@v3.12.12
      with:
        heroku_api_key: ${{secrets.HEROKU_API_KEY}}
        heroku_app_name: "your-dash-app"
        heroku_email: "your-email@example.com"
        appdir: "dash-portfolio"
Enter fullscreen mode Exit fullscreen mode

Advanced Features and Extensions
Streamlit Advanced Components:

# Custom components example
import streamlit.components.v1 as components

# Embed custom HTML/JS
components.html("""
<div id="custom-widget">
    <script>
        // Custom interactive widget
        function updateData() {
            // Your custom JavaScript here
        }
    </script>
</div>
""", height=400)

# File uploader with processing
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.dataframe(df)
Enter fullscreen mode Exit fullscreen mode

Dash Advanced Patterns:

# Multi-page app structure
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc

# Navigation
navbar = dbc.NavbarSimple(
    children=[
        dbc.NavItem(dbc.NavLink("Dashboard", href="/dashboard")),
        dbc.NavItem(dbc.NavLink("Analytics", href="/analytics")),
        dbc.NavItem(dbc.NavLink("Settings", href="/settings")),
    ],
    brand="My Dashboard",
    brand_href="/",
    color="primary",
    dark=True,
)

# URL routing
@app.callback(Output('page-content', 'children'),
              Input('url', 'pathname'))
def display_page(pathname):
    if pathname == '/dashboard':
        return dashboard_layout
    elif pathname == '/analytics':
        return analytics_layout
    else:
        return html.Div([
            html.H1('404: Not found', className='text-danger'),
            html.Hr(),
            html.P(f'The pathname {pathname} was not recognised...')
        ])
Enter fullscreen mode Exit fullscreen mode

Bokeh Server Applications:

# Advanced server app with WebSocket
from bokeh.server.server import Server
from tornado import gen

def modify_doc(doc):
    # Your Bokeh app logic here
    pass

# Custom server with additional routes
def create_server():
    server = Server({'/dashboard': modify_doc}, 
                   port=5006, 
                   allow_websocket_origin=["localhost:5006"])
    return server

if __name__ == '__main__':
    server = create_server()
    server.start()
    server.run_until_shutdown()
Enter fullscreen mode Exit fullscreen mode

Testing and Quality Assurance
Unit Testing for Dashboard Applications:

# test_dashboard.py
import pytest
import pandas as pd
from unittest.mock import patch
import streamlit as st

def test_data_processing():
    """Test data processing functions"""
    # Mock data
    test_data = pd.DataFrame({
        'date': pd.date_range('2023-01-01', periods=10),
        'value': range(10)
    })

    # Test your processing functions
    result = process_data(test_data)
    assert len(result) == 10
    assert 'processed_value' in result.columns

def test_streamlit_app():
    """Test Streamlit app components"""
    with patch('streamlit.write') as mock_write:
        # Test your app logic
        main_app()
        mock_write.assert_called()
Enter fullscreen mode Exit fullscreen mode

Performance Testing:

# performance_test.py
import time
import pandas as pd
from memory_profiler import profile

@profile
def test_large_dataset_performance():
    """Test performance with large datasets"""
    # Generate large dataset
    large_df = pd.DataFrame({
        'x': range(100000),
        'y': range(100000)
    })

    start_time = time.time()

    # Test your processing
    result = your_processing_function(large_df)

    execution_time = time.time() - start_time

    # Assert performance requirements
    assert execution_time < 5.0  # Should complete in under 5 seconds
    assert len(result) > 0
Enter fullscreen mode Exit fullscreen mode

Security Considerations
Data Protection:

# Secure data handling
import os
from cryptography.fernet import Fernet

class SecureDataHandler:
    def __init__(self):
        # Use environment variables for keys
        key = os.environ.get('ENCRYPTION_KEY', Fernet.generate_key())
        self.cipher_suite = Fernet(key)

    def encrypt_data(self, data):
        return self.cipher_suite.encrypt(data.encode())

    def decrypt_data(self, encrypted_data):
        return self.cipher_suite.decrypt(encrypted_data).decode()

# Input validation
def validate_input(user_input):
    """Validate and sanitize user inputs"""
    if not isinstance(user_input, str):
        raise ValueError("Input must be string")

    # Remove potentially dangerous characters
    safe_input = re.sub(r'[<>"\']', '', user_input)
    return safe_input[:100]  # Limit length
Enter fullscreen mode Exit fullscreen mode

Monitoring and Analytics
Application Monitoring:


# monitoring.py
import logging
import time
from functools import wraps

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('dashboard.log'),
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)

def monitor_performance(func):
    """Decorator to monitor function performance"""
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        try:
            result = func(*args, **kwargs)
            execution_time = time.time() - start_time
            logger.info(f"{func.__name__} executed in {execution_time:.2f}s")
            return result
        except Exception as e:
            logger.error(f"Error in {func.__name__}: {str(e)}")
            raise
    return wrapper

# Usage tracking
class UsageTracker:
    def __init__(self):
        self.interactions = []

    def track_interaction(self, action, user_id=None):
        self.interactions.append({
            'timestamp': time.time(),
            'action': action,
            'user_id': user_id
        })

    def get_usage_stats(self):
        return {
            'total_interactions': len(self.interactions),
            'unique_actions': len(set(i['action'] for i in self.interactions))
        }
Enter fullscreen mode Exit fullscreen mode

Conclusion
The landscape of data visualization tools in Python offers something for every use case. Streamlit excels in rapid prototyping and simplicity, making it perfect for data scientists who want to quickly share their work. Dash provides the robustness and customization needed for production applications, while Bokeh offers unmatched flexibility for complex, interactive visualizations.

Github Repository

Top comments (0)