import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from uuid import uuid4
def pie_chart_from_row(df, column_name=None):
"""
Creates a pie chart from the first row of the DataFrame.
If column_name is provided, uses it as the title.
"""
try:
if not isinstance(df, pd.DataFrame) or df.empty:
st.error("Invalid or empty DataFrame provided.")
return
# Use first row
data_row = df.iloc[0]
if column_name and column_name not in df.columns:
st.warning(f"Column {column_name} not found. Using default title.")
column_name = None
# Prepare data for pie chart
labels = data_row.index
values = data_row.values
fig = go.Figure(data=[
go.Pie(labels=labels, values=values, hole=0.3,
textinfo='label+percent',
marker=dict(colors=px.colors.qualitative.Pastel),
textfont=dict(size=14))
])
fig.update_layout(
title_text=column_name if column_name else "First Row Distribution",
title_x=0.5,
title_font=dict(size=20, family="Arial"),
showlegend=True,
template="plotly",
margin=dict(t=60, b=60, l=60, r=60)
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error generating pie chart: {str(e)}")
def pie_chart_from_column(df, column_name):
"""
Creates a pie chart showing the distribution of distinct values in a column.
The count of occurrences determines the slice size.
"""
try:
if not isinstance(df, pd.DataFrame) or df.empty:
st.error("Invalid or empty DataFrame provided.")
return
if column_name not in df.columns:
st.error(f"Column {column_name} not found in DataFrame.")
return
# Get value counts
value_counts = df[column_name].value_counts()
fig = go.Figure(data=[
go.Pie(labels=value_counts.index, values=value_counts.values,
textinfo='label+percent',
marker=dict(colors=px.colors.qualitative.Set2),
textfont=dict(size=14))
])
fig.update_layout(
title_text=f"Distribution of {column_name}",
title_x=0.5,
title_font=dict(size=20, family="Arial"),
showlegend=True,
template="plotly",
margin=dict(t=60, b=60, l=60, r=60)
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error generating column pie chart: {str(e)}")
def metric_card(df, column_name=None):
"""
Creates a professional metric card for a column's summary statistics.
Uses column_name as title if provided.
"""
try:
if not isinstance(df, pd.DataFrame) or df.empty:
st.error("Invalid or empty DataFrame provided.")
return
# Calculate metrics
if column_name and column_name in df.columns:
title = column_name
if df[column_name].dtype in ['int64', 'float64']:
value = df[column_name].mean()
display_value = f"{value:.2f}" if isinstance(value, float) else f"{value}"
else:
value = df[column_name].count()
display_value = f"{value}"
else:
title = "Total Records"
value = df.shape[0]
display_value = f"{value}"
# Create metric card with enhanced styling
st.markdown(
f"""
<div style='background-color: #ffffff; padding: 25px; border-radius: 12px; box-shadow: 0 6px 12px rgba(0,0,0,0.15); border: 1px solid #e0e0e0;'>
<h3 style='text-align: center; color: #1a3c5e; margin: 0; font-family: Arial, sans-serif;'>{title}</h3>
<h2 style='text-align: center; color: #2980b9; margin: 10px 0 0 0; font-family: Arial, sans-serif;'>{display_value}</h2>
</div>
""",
unsafe_allow_html=True
)
except Exception as e:
st.error(f"Error generating metric card: {str(e)}")
def problem_solution_visual(df, problem_col, solution_col):
"""
Creates an enhanced Sankey diagram to show relationships between problems and solutions.
"""
try:
if not isinstance(df, pd.DataFrame) or df.empty:
st.error("Invalid or empty DataFrame provided.")
return
if problem_col not in df.columns or solution_col not in df.columns:
st.error(f"One or both columns ({problem_col}, {solution_col}) not found.")
return
# Prepare data for sankey
df_sankey = df[[problem_col, solution_col]].dropna()
nodes = list(set(df_sankey[problem_col]) | set(df_sankey[solution_col]))
node_indices = {node: i for i, node in enumerate(nodes)}
source = [node_indices[p] for p in df_sankey[problem_col]]
target = [node_indices[s] for s in df_sankey[solution_col]]
value = [1] * len(source) # Count of occurrences
# Custom colors for nodes and links
node_colors = px.colors.qualitative.D3[:len(nodes)]
link_colors = [f"rgba({','.join(map(str, px.colors.hex_to_rgb(node_colors[s % len(node_colors)])))},0.3)" for s in source]
fig = go.Figure(data=[
go.Sankey(
node=dict(
pad=20,
thickness=30,
line=dict(color="black", width=0.5),
label=nodes,
color=node_colors,
hovertemplate='%{label}<br>Count: %{value}<extra></extra>'
),
link=dict(
source=source,
target=target,
value=value,
color=link_colors,
hovertemplate='From: %{source.label}<br>To: %{target.label}<br>Count: %{value}<extra></extra>'
)
)
])
fig.update_layout(
title_text=f"{problem_col} to {solution_col} Flow",
title_x=0.5,
title_font=dict(size=20, family="Arial"),
font=dict(size=12, family="Arial", color="#2c3e50"),
template="plotly",
margin=dict(t=80, b=60, l=60, r=60),
height=600,
annotations=[
dict(
x=0, y=1.05, xref="paper", yref="paper",
text="Problem to Solution Flow Analysis",
showarrow=False,
font=dict(size=14, color="#1a3c5e")
)
]
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error generating Sankey diagram: {str(e)}")
def status_by_user_visual(df, user_col, status_col):
"""
Creates a grouped bar chart showing users and their status with distinct colors.
"""
try:
if not isinstance(df, pd.DataFrame) or df.empty:
st.error("Invalid or empty DataFrame provided.")
return
if user_col not in df.columns or status_col not in df.columns:
st.error(f"One or both columns ({user_col}, {status_col}) not found.")
return
# Prepare data
df_grouped = df.groupby([user_col, status_col]).size().unstack(fill_value=0)
fig = go.Figure()
for status in df_grouped.columns:
fig.add_trace(go.Bar(
x=df_grouped.index,
y=df_grouped[status],
name=status,
marker_color=px.colors.qualitative.Set1[list(df_grouped.columns).index(status) % len(px.colors.qualitative.Set1)]
))
fig.update_layout(
title_text=f"{user_col} by {status_col}",
title_x=0.5,
title_font=dict(size=20, family="Arial"),
xaxis_title=user_col,
yaxis_title="Count",
barmode='group',
template="plotly",
margin=dict(t=60, b=60, l=60, r=60),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
font=dict(family="Arial", size=12, color="#2c3e50")
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error generating status bar chart: {str(e)}")
def single_column_visual(df, column_name):
"""
Creates a professional histogram or bar chart for a single column's values.
"""
try:
if not isinstance(df, pd.DataFrame) or df.empty:
st.error("Invalid or empty DataFrame provided.")
return
if column_name not in df.columns:
st.error(f"Column {column_name} not found in DataFrame.")
return
# Choose visualization based on data type
if df[column_name].dtype in ['int64', 'float64']:
fig = px.histogram(df, x=column_name, nbins=30,
color_discrete_sequence=px.colors.qualitative.Bold)
else:
value_counts = df[column_name].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
color_discrete_sequence=px.colors.qualitative.Bold)
fig.update_layout(xaxis_title=column_name, yaxis_title="Count")
fig.update_layout(
title_text=f"Distribution of {column_name}",
title_x=0.5,
title_font=dict(size=20, family="Arial"),
template="plotly",
margin=dict(t=60, b=60, l=60, r=60),
font=dict(family="Arial", size=12, color="#2c3e50")
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error generating single column visual: {str(e)}")
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import Optional, Union, List, Dict, Any
import logging
from functools import wraps
import numpy as np
Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(name)
class DataVizError(Exception):
"""Custom exception for data visualization errors"""
pass
def handle_errors(func):
"""Decorator for comprehensive error handling"""
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except DataVizError as e:
st.error(f"❌ Visualization Error: {str(e)}")
logger.error(f"DataVizError in {func.name}: {str(e)}")
except KeyError as e:
st.error(f"❌ Column Error: {str(e)} not found in data")
logger.error(f"KeyError in {func.name}: {str(e)}")
except ValueError as e:
st.error(f"❌ Data Error: Invalid data format - {str(e)}")
logger.error(f"ValueError in {func.name}: {str(e)}")
except Exception as e:
st.error(f"❌ Unexpected Error: {str(e)}")
logger.error(f"Unexpected error in {func.name}: {str(e)}")
return wrapper
class ModernDataVisualizer:
"""
A modern, user-friendly data visualization class with enhanced UI and error handling
"""
def __init__(self):
self.colors = {
'primary': px.colors.qualitative.Set3,
'secondary': px.colors.qualitative.Pastel,
'accent': px.colors.qualitative.Bold
}
self.theme = {
'background': '#ffffff',
'text': '#2c3e50',
'accent': '#3498db',
'success': '#27ae60',
'warning': '#f39c12',
'error': '#e74c3c'
}
def _validate_dataframe(self, df: pd.DataFrame) -> None:
"""Validate DataFrame input"""
if not isinstance(df, pd.DataFrame):
raise DataVizError("Input must be a pandas DataFrame")
if df.empty:
raise DataVizError("DataFrame is empty")
if df.shape[0] == 0:
raise DataVizError("DataFrame has no rows")
def _validate_column(self, df: pd.DataFrame, column: str) -> None:
"""Validate column exists in DataFrame"""
if column not in df.columns:
available_cols = ", ".join(df.columns.tolist())
raise DataVizError(f"Column '{column}' not found. Available columns: {available_cols}")
def _create_modern_layout(self, title: str, subtitle: Optional[str] = None) -> Dict[str, Any]:
"""Create modern layout configuration"""
return {
'title': {
'text': title,
'x': 0.5,
'font': {'size': 24, 'family': 'Arial Black', 'color': self.theme['text']}
},
'font': {'family': 'Arial', 'size': 12, 'color': self.theme['text']},
'plot_bgcolor': 'rgba(0,0,0,0)',
'paper_bgcolor': self.theme['background'],
'margin': {'t': 80, 'b': 60, 'l': 60, 'r': 60},
'showlegend': True,
'legend': {
'orientation': 'h',
'yanchor': 'bottom',
'y': 1.02,
'xanchor': 'center',
'x': 0.5
}
}
@handle_errors
def create_pie_chart(self, df: pd.DataFrame, column: Optional[str] = None,
use_first_row: bool = False, title: Optional[str] = None) -> None:
"""
Create an enhanced pie chart with modern styling
Args:
df: Input DataFrame
column: Column name for value distribution (if None and use_first_row=False, shows column count)
use_first_row: If True, creates pie chart from first row values
title: Custom title for the chart
"""
self._validate_dataframe(df)
if use_first_row:
# Use first row values
data_row = df.iloc[0]
labels = [str(col) for col in data_row.index]
values = data_row.values
chart_title = title or "First Row Distribution"
elif column:
# Use column value distribution
self._validate_column(df, column)
value_counts = df[column].value_counts()
labels = value_counts.index.tolist()
values = value_counts.values
chart_title = title or f"Distribution of {column}"
else:
# Show basic column info
numeric_cols = df.select_dtypes(include=[np.number]).shape[1]
text_cols = df.select_dtypes(include=[object]).shape[1]
labels = ['Numeric Columns', 'Text Columns']
values = [numeric_cols, text_cols]
chart_title = title or "Column Types Distribution"
# Create enhanced pie chart
fig = go.Figure(data=[
go.Pie(
labels=labels,
values=values,
hole=0.4,
textinfo='label+percent+value',
textfont={'size': 12, 'color': 'white'},
marker={
'colors': self.colors['primary'],
'line': {'color': '#ffffff', 'width': 2}
},
hovertemplate='<b>%{label}</b><br>Value: %{value}<br>Percentage: %{percent}<extra></extra>'
)
])
# Apply modern styling
layout = self._create_modern_layout(chart_title)
fig.update_layout(**layout)
# Display with info
col1, col2 = st.columns([3, 1])
with col1:
st.plotly_chart(fig, use_container_width=True)
with col2:
st.info(f"📊 **Chart Info**\n\n• Total items: {len(labels)}\n• Data points: {sum(values)}")
@handle_errors
def create_metric_card(self, df: pd.DataFrame, column: Optional[str] = None,
metric_type: str = 'count') -> None:
"""
Create professional metric cards with modern styling
Args:
df: Input DataFrame
column: Column name for metrics
metric_type: Type of metric ('count', 'mean', 'sum', 'unique')
"""
self._validate_dataframe(df)
if column:
self._validate_column(df, column)
if metric_type == 'mean' and df[column].dtype in ['int64', 'float64']:
value = df[column].mean()
display_value = f"{value:.2f}"
subtitle = "Average Value"
elif metric_type == 'sum' and df[column].dtype in ['int64', 'float64']:
value = df[column].sum()
display_value = f"{value:,}"
subtitle = "Total Sum"
elif metric_type == 'unique':
value = df[column].nunique()
display_value = f"{value:,}"
subtitle = "Unique Values"
else:
value = df[column].count()
display_value = f"{value:,}"
subtitle = "Non-null Count"
title = column.replace('_', ' ').title()
else:
value = df.shape[0]
display_value = f"{value:,}"
title = "Total Records"
subtitle = f"{df.shape[1]} columns"
# Create enhanced metric card
st.markdown(
f"""
<div style='
background: linear-gradient(135deg, {self.theme['accent']}, {self.theme['success']});
padding: 30px;
border-radius: 15px;
box-shadow: 0 10px 25px rgba(0,0,0,0.1);
text-align: center;
color: white;
position: relative;
overflow: hidden;
'>
<div style='
position: absolute;
top: -50%;
right: -50%;
width: 100px;
height: 100px;
background: rgba(255,255,255,0.1);
border-radius: 50%;
'></div>
<h3 style='margin: 0; font-size: 16px; opacity: 0.9;'>{subtitle}</h3>
<h1 style='margin: 10px 0; font-size: 2.5em; font-weight: bold;'>{display_value}</h1>
<h2 style='margin: 0; font-size: 18px; opacity: 0.9;'>{title}</h2>
</div>
""",
unsafe_allow_html=True
)
@handle_errors
def create_flow_diagram(self, df: pd.DataFrame, source_col: str, target_col: str,
title: Optional[str] = None) -> None:
"""
Create an enhanced Sankey flow diagram
Args:
df: Input DataFrame
source_col: Source column name
target_col: Target column name
title: Custom title
"""
self._validate_dataframe(df)
self._validate_column(df, source_col)
self._validate_column(df, target_col)
# Prepare data
df_clean = df[[source_col, target_col]].dropna()
if df_clean.empty:
raise DataVizError("No valid data after removing null values")
# Get flow counts
flow_counts = df_clean.groupby([source_col, target_col]).size().reset_index(name='count')
# Create nodes
all_nodes = list(set(df_clean[source_col].unique()) | set(df_clean[target_col].unique()))
node_dict = {node: idx for idx, node in enumerate(all_nodes)}
# Prepare links
source_indices = [node_dict[row[source_col]] for _, row in flow_counts.iterrows()]
target_indices = [node_dict[row[target_col]] for _, row in flow_counts.iterrows()]
values = flow_counts['count'].tolist()
# Create Sankey diagram
fig = go.Figure(data=[
go.Sankey(
node={
'pad': 15,
'thickness': 20,
'line': {'color': "black", 'width': 0.5},
'label': all_nodes,
'color': self.colors['primary'][:len(all_nodes)]
},
link={
'source': source_indices,
'target': target_indices,
'value': values,
'color': [f"rgba(52, 152, 219, 0.3)" for _ in values]
}
)
])
chart_title = title or f"{source_col} → {target_col} Flow"
layout = self._create_modern_layout(chart_title)
layout['height'] = 600
fig.update_layout(**layout)
# Display with statistics
col1, col2 = st.columns([4, 1])
with col1:
st.plotly_chart(fig, use_container_width=True)
with col2:
st.metric("🔄 Total Flows", len(flow_counts))
st.metric("📍 Unique Sources", df_clean[source_col].nunique())
st.metric("🎯 Unique Targets", df_clean[target_col].nunique())
@handle_errors
def create_grouped_chart(self, df: pd.DataFrame, group_col: str, value_col: str,
chart_type: str = 'bar', title: Optional[str] = None) -> None:
"""
Create grouped visualization (bar chart or others)
Args:
df: Input DataFrame
group_col: Column to group by
value_col: Column for values
chart_type: Type of chart ('bar', 'line', 'scatter')
title: Custom title
"""
self._validate_dataframe(df)
self._validate_column(df, group_col)
self._validate_column(df, value_col)
# Prepare grouped data
if df[value_col].dtype in ['int64', 'float64']:
grouped_data = df.groupby([group_col, value_col]).size().unstack(fill_value=0)
else:
grouped_data = df.groupby([group_col, value_col]).size().unstack(fill_value=0)
# Create visualization based on type
fig = go.Figure()
colors = self.colors['accent']
for idx, col in enumerate(grouped_data.columns):
if chart_type == 'bar':
fig.add_trace(go.Bar(
name=str(col),
x=grouped_data.index,
y=grouped_data[col],
marker_color=colors[idx % len(colors)]
))
elif chart_type == 'line':
fig.add_trace(go.Scatter(
name=str(col),
x=grouped_data.index,
y=grouped_data[col],
mode='lines+markers',
line_color=colors[idx % len(colors)]
))
chart_title = title or f"{group_col} by {value_col}"
layout = self._create_modern_layout(chart_title)
layout['barmode'] = 'group' if chart_type == 'bar' else None
layout['xaxis'] = {'title': group_col}
layout['yaxis'] = {'title': 'Count'}
fig.update_layout(**layout)
st.plotly_chart(fig, use_container_width=True)
@handle_errors
def create_distribution_chart(self, df: pd.DataFrame, column: str,
chart_type: str = 'auto', title: Optional[str] = None) -> None:
"""
Create distribution visualization with automatic type detection
Args:
df: Input DataFrame
column: Column name
chart_type: Chart type ('auto', 'histogram', 'bar', 'box')
title: Custom title
"""
self._validate_dataframe(df)
self._validate_column(df, column)
# Auto-detect chart type
if chart_type == 'auto':
if df[column].dtype in ['int64', 'float64']:
chart_type = 'histogram'
else:
chart_type = 'bar'
# Create appropriate visualization
if chart_type == 'histogram':
fig = px.histogram(
df, x=column,
nbins=min(30, df[column].nunique()),
color_discrete_sequence=[self.theme['accent']]
)
elif chart_type == 'box':
fig = px.box(df, y=column, color_discrete_sequence=[self.theme['accent']])
else: # bar chart
value_counts = df[column].value_counts().head(20) # Limit to top 20
fig = px.bar(
x=value_counts.index,
y=value_counts.values,
color_discrete_sequence=[self.theme['accent']]
)
fig.update_layout(xaxis_title=column, yaxis_title="Count")
chart_title = title or f"Distribution of {column}"
layout = self._create_modern_layout(chart_title)
fig.update_layout(**layout)
# Display with statistics
col1, col2 = st.columns([3, 1])
with col1:
st.plotly_chart(fig, use_container_width=True)
with col2:
if df[column].dtype in ['int64', 'float64']:
st.metric("📊 Mean", f"{df[column].mean():.2f}")
st.metric("📈 Std Dev", f"{df[column].std():.2f}")
st.metric("🎯 Unique", df[column].nunique())
else:
st.metric("🔤 Unique Values", df[column].nunique())
st.metric("📝 Most Common", df[column].mode().iloc[0] if not df[column].mode().empty else "N/A")
Convenience functions for easy usage
def quick_pie(df: pd.DataFrame, column: Optional[str] = None, **kwargs):
"""Quick pie chart creation"""
viz = ModernDataVisualizer()
viz.create_pie_chart(df, column, **kwargs)
def quick_metric(df: pd.DataFrame, column: Optional[str] = None, **kwargs):
"""Quick metric card creation"""
viz = ModernDataVisualizer()
viz.create_metric_card(df, column, **kwargs)
def quick_flow(df: pd.DataFrame, source_col: str, target_col: str, **kwargs):
"""Quick flow diagram creation"""
viz = ModernDataVisualizer()
viz.create_flow_diagram(df, source_col, target_col, **kwargs)
def quick_distribution(df: pd.DataFrame, column: str, **kwargs):
"""Quick distribution chart creation"""
viz = ModernDataVisualizer()
viz.create_distribution_chart(df, column, **kwargs)
Example usage
if name == "main":
st.title("🎨 Modern Data Visualizer Demo")
# Sample data
sample_data = pd.DataFrame({
'category': ['A', 'B', 'C', 'A', 'B'] * 20,
'values': np.random.randn(100),
'status': np.random.choice(['Active', 'Inactive', 'Pending'], 100)
})
viz = ModernDataVisualizer()
st.subheader("📊 Sample Visualizations")
col1, col2 = st.columns(2)
with col1:
viz.create_metric_card(sample_data, 'values', 'mean')
with col2:
viz.create_pie_chart(sample_data, 'category')
viz.create_distribution_chart(sample_data, 'values')
viz.create_flow_diagram(sample_data, 'category', 'status')
Top comments (0)