DEV Community

Armaan Khan
Armaan Khan

Posted on

new extra

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from uuid import uuid4

def pie_chart_from_row(df, column_name=None):
    """
    Creates a pie chart from the first row of the DataFrame.
    If column_name is provided, uses it as the title.
    """
    try:
        if not isinstance(df, pd.DataFrame) or df.empty:
            st.error("Invalid or empty DataFrame provided.")
            return

        # Use first row
        data_row = df.iloc[0]
        if column_name and column_name not in df.columns:
            st.warning(f"Column {column_name} not found. Using default title.")
            column_name = None

        # Prepare data for pie chart
        labels = data_row.index
        values = data_row.values

        fig = go.Figure(data=[
            go.Pie(labels=labels, values=values, hole=0.3, 
                  textinfo='label+percent', 
                  marker=dict(colors=px.colors.qualitative.Pastel),
                  textfont=dict(size=14))
        ])

        fig.update_layout(
            title_text=column_name if column_name else "First Row Distribution",
            title_x=0.5,
            title_font=dict(size=20, family="Arial"),
            showlegend=True,
            template="plotly",
            margin=dict(t=60, b=60, l=60, r=60)
        )

        st.plotly_chart(fig, use_container_width=True)

    except Exception as e:
        st.error(f"Error generating pie chart: {str(e)}")

def pie_chart_from_column(df, column_name):
    """
    Creates a pie chart showing the distribution of distinct values in a column.
    The count of occurrences determines the slice size.
    """
    try:
        if not isinstance(df, pd.DataFrame) or df.empty:
            st.error("Invalid or empty DataFrame provided.")
            return
        if column_name not in df.columns:
            st.error(f"Column {column_name} not found in DataFrame.")
            return

        # Get value counts
        value_counts = df[column_name].value_counts()

        fig = go.Figure(data=[
            go.Pie(labels=value_counts.index, values=value_counts.values,
                  textinfo='label+percent',
                  marker=dict(colors=px.colors.qualitative.Set2),
                  textfont=dict(size=14))
        ])

        fig.update_layout(
            title_text=f"Distribution of {column_name}",
            title_x=0.5,
            title_font=dict(size=20, family="Arial"),
            showlegend=True,
            template="plotly",
            margin=dict(t=60, b=60, l=60, r=60)
        )

        st.plotly_chart(fig, use_container_width=True)

    except Exception as e:
        st.error(f"Error generating column pie chart: {str(e)}")

def metric_card(df, column_name=None):
    """
    Creates a professional metric card for a column's summary statistics.
    Uses column_name as title if provided.
    """
    try:
        if not isinstance(df, pd.DataFrame) or df.empty:
            st.error("Invalid or empty DataFrame provided.")
            return

        # Calculate metrics
        if column_name and column_name in df.columns:
            title = column_name
            if df[column_name].dtype in ['int64', 'float64']:
                value = df[column_name].mean()
                display_value = f"{value:.2f}" if isinstance(value, float) else f"{value}"
            else:
                value = df[column_name].count()
                display_value = f"{value}"
        else:
            title = "Total Records"
            value = df.shape[0]
            display_value = f"{value}"

        # Create metric card with enhanced styling
        st.markdown(
            f"""
            <div style='background-color: #ffffff; padding: 25px; border-radius: 12px; box-shadow: 0 6px 12px rgba(0,0,0,0.15); border: 1px solid #e0e0e0;'>
                <h3 style='text-align: center; color: #1a3c5e; margin: 0; font-family: Arial, sans-serif;'>{title}</h3>
                <h2 style='text-align: center; color: #2980b9; margin: 10px 0 0 0; font-family: Arial, sans-serif;'>{display_value}</h2>
            </div>
            """,
            unsafe_allow_html=True
        )

    except Exception as e:
        st.error(f"Error generating metric card: {str(e)}")

def problem_solution_visual(df, problem_col, solution_col):
    """
    Creates an enhanced Sankey diagram to show relationships between problems and solutions.
    """
    try:
        if not isinstance(df, pd.DataFrame) or df.empty:
            st.error("Invalid or empty DataFrame provided.")
            return
        if problem_col not in df.columns or solution_col not in df.columns:
            st.error(f"One or both columns ({problem_col}, {solution_col}) not found.")
            return

        # Prepare data for sankey
        df_sankey = df[[problem_col, solution_col]].dropna()
        nodes = list(set(df_sankey[problem_col]) | set(df_sankey[solution_col]))
        node_indices = {node: i for i, node in enumerate(nodes)}

        source = [node_indices[p] for p in df_sankey[problem_col]]
        target = [node_indices[s] for s in df_sankey[solution_col]]
        value = [1] * len(source)  # Count of occurrences

        # Custom colors for nodes and links
        node_colors = px.colors.qualitative.D3[:len(nodes)]
        link_colors = [f"rgba({','.join(map(str, px.colors.hex_to_rgb(node_colors[s % len(node_colors)])))},0.3)" for s in source]

        fig = go.Figure(data=[
            go.Sankey(
                node=dict(
                    pad=20,
                    thickness=30,
                    line=dict(color="black", width=0.5),
                    label=nodes,
                    color=node_colors,
                    hovertemplate='%{label}<br>Count: %{value}<extra></extra>'
                ),
                link=dict(
                    source=source,
                    target=target,
                    value=value,
                    color=link_colors,
                    hovertemplate='From: %{source.label}<br>To: %{target.label}<br>Count: %{value}<extra></extra>'
                )
            )
        ])

        fig.update_layout(
            title_text=f"{problem_col} to {solution_col} Flow",
            title_x=0.5,
            title_font=dict(size=20, family="Arial"),
            font=dict(size=12, family="Arial", color="#2c3e50"),
            template="plotly",
            margin=dict(t=80, b=60, l=60, r=60),
            height=600,
            annotations=[
                dict(
                    x=0, y=1.05, xref="paper", yref="paper",
                    text="Problem to Solution Flow Analysis",
                    showarrow=False,
                    font=dict(size=14, color="#1a3c5e")
                )
            ]
        )

        st.plotly_chart(fig, use_container_width=True)

    except Exception as e:
        st.error(f"Error generating Sankey diagram: {str(e)}")

def status_by_user_visual(df, user_col, status_col):
    """
    Creates a grouped bar chart showing users and their status with distinct colors.
    """
    try:
        if not isinstance(df, pd.DataFrame) or df.empty:
            st.error("Invalid or empty DataFrame provided.")
            return
        if user_col not in df.columns or status_col not in df.columns:
            st.error(f"One or both columns ({user_col}, {status_col}) not found.")
            return

        # Prepare data
        df_grouped = df.groupby([user_col, status_col]).size().unstack(fill_value=0)

        fig = go.Figure()
        for status in df_grouped.columns:
            fig.add_trace(go.Bar(
                x=df_grouped.index,
                y=df_grouped[status],
                name=status,
                marker_color=px.colors.qualitative.Set1[list(df_grouped.columns).index(status) % len(px.colors.qualitative.Set1)]
            ))

        fig.update_layout(
            title_text=f"{user_col} by {status_col}",
            title_x=0.5,
            title_font=dict(size=20, family="Arial"),
            xaxis_title=user_col,
            yaxis_title="Count",
            barmode='group',
            template="plotly",
            margin=dict(t=60, b=60, l=60, r=60),
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
            font=dict(family="Arial", size=12, color="#2c3e50")
        )

        st.plotly_chart(fig, use_container_width=True)

    except Exception as e:
        st.error(f"Error generating status bar chart: {str(e)}")

def single_column_visual(df, column_name):
    """
    Creates a professional histogram or bar chart for a single column's values.
    """
    try:
        if not isinstance(df, pd.DataFrame) or df.empty:
            st.error("Invalid or empty DataFrame provided.")
            return
        if column_name not in df.columns:
            st.error(f"Column {column_name} not found in DataFrame.")
            return

        # Choose visualization based on data type
        if df[column_name].dtype in ['int64', 'float64']:
            fig = px.histogram(df, x=column_name, nbins=30,
                             color_discrete_sequence=px.colors.qualitative.Bold)
        else:
            value_counts = df[column_name].value_counts()
            fig = px.bar(x=value_counts.index, y=value_counts.values,
                        color_discrete_sequence=px.colors.qualitative.Bold)
            fig.update_layout(xaxis_title=column_name, yaxis_title="Count")

        fig.update_layout(
            title_text=f"Distribution of {column_name}",
            title_x=0.5,
            title_font=dict(size=20, family="Arial"),
            template="plotly",
            margin=dict(t=60, b=60, l=60, r=60),
            font=dict(family="Arial", size=12, color="#2c3e50")
        )

        st.plotly_chart(fig, use_container_width=True)

    except Exception as e:
        st.error(f"Error generating single column visual: {str(e)}")

Enter fullscreen mode Exit fullscreen mode

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import Optional, Union, List, Dict, Any
import logging
from functools import wraps
import numpy as np

Configure logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(name)

class DataVizError(Exception):
"""Custom exception for data visualization errors"""
pass

def handle_errors(func):
"""Decorator for comprehensive error handling"""
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except DataVizError as e:
st.error(f"❌ Visualization Error: {str(e)}")
logger.error(f"DataVizError in {func.name}: {str(e)}")
except KeyError as e:
st.error(f"❌ Column Error: {str(e)} not found in data")
logger.error(f"KeyError in {func.name}: {str(e)}")
except ValueError as e:
st.error(f"❌ Data Error: Invalid data format - {str(e)}")
logger.error(f"ValueError in {func.name}: {str(e)}")
except Exception as e:
st.error(f"❌ Unexpected Error: {str(e)}")
logger.error(f"Unexpected error in {func.name}: {str(e)}")
return wrapper

class ModernDataVisualizer:
"""
A modern, user-friendly data visualization class with enhanced UI and error handling
"""

def __init__(self):
    self.colors = {
        'primary': px.colors.qualitative.Set3,
        'secondary': px.colors.qualitative.Pastel,
        'accent': px.colors.qualitative.Bold
    }
    self.theme = {
        'background': '#ffffff',
        'text': '#2c3e50',
        'accent': '#3498db',
        'success': '#27ae60',
        'warning': '#f39c12',
        'error': '#e74c3c'
    }

def _validate_dataframe(self, df: pd.DataFrame) -> None:
    """Validate DataFrame input"""
    if not isinstance(df, pd.DataFrame):
        raise DataVizError("Input must be a pandas DataFrame")
    if df.empty:
        raise DataVizError("DataFrame is empty")
    if df.shape[0] == 0:
        raise DataVizError("DataFrame has no rows")

def _validate_column(self, df: pd.DataFrame, column: str) -> None:
    """Validate column exists in DataFrame"""
    if column not in df.columns:
        available_cols = ", ".join(df.columns.tolist())
        raise DataVizError(f"Column '{column}' not found. Available columns: {available_cols}")

def _create_modern_layout(self, title: str, subtitle: Optional[str] = None) -> Dict[str, Any]:
    """Create modern layout configuration"""
    return {
        'title': {
            'text': title,
            'x': 0.5,
            'font': {'size': 24, 'family': 'Arial Black', 'color': self.theme['text']}
        },
        'font': {'family': 'Arial', 'size': 12, 'color': self.theme['text']},
        'plot_bgcolor': 'rgba(0,0,0,0)',
        'paper_bgcolor': self.theme['background'],
        'margin': {'t': 80, 'b': 60, 'l': 60, 'r': 60},
        'showlegend': True,
        'legend': {
            'orientation': 'h',
            'yanchor': 'bottom',
            'y': 1.02,
            'xanchor': 'center',
            'x': 0.5
        }
    }

@handle_errors
def create_pie_chart(self, df: pd.DataFrame, column: Optional[str] = None, 
                    use_first_row: bool = False, title: Optional[str] = None) -> None:
    """
    Create an enhanced pie chart with modern styling

    Args:
        df: Input DataFrame
        column: Column name for value distribution (if None and use_first_row=False, shows column count)
        use_first_row: If True, creates pie chart from first row values
        title: Custom title for the chart
    """
    self._validate_dataframe(df)

    if use_first_row:
        # Use first row values
        data_row = df.iloc[0]
        labels = [str(col) for col in data_row.index]
        values = data_row.values
        chart_title = title or "First Row Distribution"
    elif column:
        # Use column value distribution
        self._validate_column(df, column)
        value_counts = df[column].value_counts()
        labels = value_counts.index.tolist()
        values = value_counts.values
        chart_title = title or f"Distribution of {column}"
    else:
        # Show basic column info
        numeric_cols = df.select_dtypes(include=[np.number]).shape[1]
        text_cols = df.select_dtypes(include=[object]).shape[1]
        labels = ['Numeric Columns', 'Text Columns']
        values = [numeric_cols, text_cols]
        chart_title = title or "Column Types Distribution"

    # Create enhanced pie chart
    fig = go.Figure(data=[
        go.Pie(
            labels=labels,
            values=values,
            hole=0.4,
            textinfo='label+percent+value',
            textfont={'size': 12, 'color': 'white'},
            marker={
                'colors': self.colors['primary'],
                'line': {'color': '#ffffff', 'width': 2}
            },
            hovertemplate='<b>%{label}</b><br>Value: %{value}<br>Percentage: %{percent}<extra></extra>'
        )
    ])

    # Apply modern styling
    layout = self._create_modern_layout(chart_title)
    fig.update_layout(**layout)

    # Display with info
    col1, col2 = st.columns([3, 1])
    with col1:
        st.plotly_chart(fig, use_container_width=True)
    with col2:
        st.info(f"📊 **Chart Info**\n\n• Total items: {len(labels)}\n• Data points: {sum(values)}")

@handle_errors
def create_metric_card(self, df: pd.DataFrame, column: Optional[str] = None, 
                      metric_type: str = 'count') -> None:
    """
    Create professional metric cards with modern styling

    Args:
        df: Input DataFrame
        column: Column name for metrics
        metric_type: Type of metric ('count', 'mean', 'sum', 'unique')
    """
    self._validate_dataframe(df)

    if column:
        self._validate_column(df, column)

        if metric_type == 'mean' and df[column].dtype in ['int64', 'float64']:
            value = df[column].mean()
            display_value = f"{value:.2f}"
            subtitle = "Average Value"
        elif metric_type == 'sum' and df[column].dtype in ['int64', 'float64']:
            value = df[column].sum()
            display_value = f"{value:,}"
            subtitle = "Total Sum"
        elif metric_type == 'unique':
            value = df[column].nunique()
            display_value = f"{value:,}"
            subtitle = "Unique Values"
        else:
            value = df[column].count()
            display_value = f"{value:,}"
            subtitle = "Non-null Count"

        title = column.replace('_', ' ').title()
    else:
        value = df.shape[0]
        display_value = f"{value:,}"
        title = "Total Records"
        subtitle = f"{df.shape[1]} columns"

    # Create enhanced metric card
    st.markdown(
        f"""
        <div style='
            background: linear-gradient(135deg, {self.theme['accent']}, {self.theme['success']});
            padding: 30px;
            border-radius: 15px;
            box-shadow: 0 10px 25px rgba(0,0,0,0.1);
            text-align: center;
            color: white;
            position: relative;
            overflow: hidden;
        '>
            <div style='
                position: absolute;
                top: -50%;
                right: -50%;
                width: 100px;
                height: 100px;
                background: rgba(255,255,255,0.1);
                border-radius: 50%;
            '></div>
            <h3 style='margin: 0; font-size: 16px; opacity: 0.9;'>{subtitle}</h3>
            <h1 style='margin: 10px 0; font-size: 2.5em; font-weight: bold;'>{display_value}</h1>
            <h2 style='margin: 0; font-size: 18px; opacity: 0.9;'>{title}</h2>
        </div>
        """,
        unsafe_allow_html=True
    )

@handle_errors
def create_flow_diagram(self, df: pd.DataFrame, source_col: str, target_col: str, 
                       title: Optional[str] = None) -> None:
    """
    Create an enhanced Sankey flow diagram

    Args:
        df: Input DataFrame
        source_col: Source column name
        target_col: Target column name
        title: Custom title
    """
    self._validate_dataframe(df)
    self._validate_column(df, source_col)
    self._validate_column(df, target_col)

    # Prepare data
    df_clean = df[[source_col, target_col]].dropna()
    if df_clean.empty:
        raise DataVizError("No valid data after removing null values")

    # Get flow counts
    flow_counts = df_clean.groupby([source_col, target_col]).size().reset_index(name='count')

    # Create nodes
    all_nodes = list(set(df_clean[source_col].unique()) | set(df_clean[target_col].unique()))
    node_dict = {node: idx for idx, node in enumerate(all_nodes)}

    # Prepare links
    source_indices = [node_dict[row[source_col]] for _, row in flow_counts.iterrows()]
    target_indices = [node_dict[row[target_col]] for _, row in flow_counts.iterrows()]
    values = flow_counts['count'].tolist()

    # Create Sankey diagram
    fig = go.Figure(data=[
        go.Sankey(
            node={
                'pad': 15,
                'thickness': 20,
                'line': {'color': "black", 'width': 0.5},
                'label': all_nodes,
                'color': self.colors['primary'][:len(all_nodes)]
            },
            link={
                'source': source_indices,
                'target': target_indices,
                'value': values,
                'color': [f"rgba(52, 152, 219, 0.3)" for _ in values]
            }
        )
    ])

    chart_title = title or f"{source_col} → {target_col} Flow"
    layout = self._create_modern_layout(chart_title)
    layout['height'] = 600
    fig.update_layout(**layout)

    # Display with statistics
    col1, col2 = st.columns([4, 1])
    with col1:
        st.plotly_chart(fig, use_container_width=True)
    with col2:
        st.metric("🔄 Total Flows", len(flow_counts))
        st.metric("📍 Unique Sources", df_clean[source_col].nunique())
        st.metric("🎯 Unique Targets", df_clean[target_col].nunique())

@handle_errors
def create_grouped_chart(self, df: pd.DataFrame, group_col: str, value_col: str, 
                       chart_type: str = 'bar', title: Optional[str] = None) -> None:
    """
    Create grouped visualization (bar chart or others)

    Args:
        df: Input DataFrame
        group_col: Column to group by
        value_col: Column for values
        chart_type: Type of chart ('bar', 'line', 'scatter')
        title: Custom title
    """
    self._validate_dataframe(df)
    self._validate_column(df, group_col)
    self._validate_column(df, value_col)

    # Prepare grouped data
    if df[value_col].dtype in ['int64', 'float64']:
        grouped_data = df.groupby([group_col, value_col]).size().unstack(fill_value=0)
    else:
        grouped_data = df.groupby([group_col, value_col]).size().unstack(fill_value=0)

    # Create visualization based on type
    fig = go.Figure()

    colors = self.colors['accent']
    for idx, col in enumerate(grouped_data.columns):
        if chart_type == 'bar':
            fig.add_trace(go.Bar(
                name=str(col),
                x=grouped_data.index,
                y=grouped_data[col],
                marker_color=colors[idx % len(colors)]
            ))
        elif chart_type == 'line':
            fig.add_trace(go.Scatter(
                name=str(col),
                x=grouped_data.index,
                y=grouped_data[col],
                mode='lines+markers',
                line_color=colors[idx % len(colors)]
            ))

    chart_title = title or f"{group_col} by {value_col}"
    layout = self._create_modern_layout(chart_title)
    layout['barmode'] = 'group' if chart_type == 'bar' else None
    layout['xaxis'] = {'title': group_col}
    layout['yaxis'] = {'title': 'Count'}

    fig.update_layout(**layout)
    st.plotly_chart(fig, use_container_width=True)

@handle_errors
def create_distribution_chart(self, df: pd.DataFrame, column: str, 
                            chart_type: str = 'auto', title: Optional[str] = None) -> None:
    """
    Create distribution visualization with automatic type detection

    Args:
        df: Input DataFrame
        column: Column name
        chart_type: Chart type ('auto', 'histogram', 'bar', 'box')
        title: Custom title
    """
    self._validate_dataframe(df)
    self._validate_column(df, column)

    # Auto-detect chart type
    if chart_type == 'auto':
        if df[column].dtype in ['int64', 'float64']:
            chart_type = 'histogram'
        else:
            chart_type = 'bar'

    # Create appropriate visualization
    if chart_type == 'histogram':
        fig = px.histogram(
            df, x=column,
            nbins=min(30, df[column].nunique()),
            color_discrete_sequence=[self.theme['accent']]
        )
    elif chart_type == 'box':
        fig = px.box(df, y=column, color_discrete_sequence=[self.theme['accent']])
    else:  # bar chart
        value_counts = df[column].value_counts().head(20)  # Limit to top 20
        fig = px.bar(
            x=value_counts.index,
            y=value_counts.values,
            color_discrete_sequence=[self.theme['accent']]
        )
        fig.update_layout(xaxis_title=column, yaxis_title="Count")

    chart_title = title or f"Distribution of {column}"
    layout = self._create_modern_layout(chart_title)
    fig.update_layout(**layout)

    # Display with statistics
    col1, col2 = st.columns([3, 1])
    with col1:
        st.plotly_chart(fig, use_container_width=True)
    with col2:
        if df[column].dtype in ['int64', 'float64']:
            st.metric("📊 Mean", f"{df[column].mean():.2f}")
            st.metric("📈 Std Dev", f"{df[column].std():.2f}")
            st.metric("🎯 Unique", df[column].nunique())
        else:
            st.metric("🔤 Unique Values", df[column].nunique())
            st.metric("📝 Most Common", df[column].mode().iloc[0] if not df[column].mode().empty else "N/A")
Enter fullscreen mode Exit fullscreen mode

Convenience functions for easy usage

def quick_pie(df: pd.DataFrame, column: Optional[str] = None, **kwargs):
"""Quick pie chart creation"""
viz = ModernDataVisualizer()
viz.create_pie_chart(df, column, **kwargs)

def quick_metric(df: pd.DataFrame, column: Optional[str] = None, **kwargs):
"""Quick metric card creation"""
viz = ModernDataVisualizer()
viz.create_metric_card(df, column, **kwargs)

def quick_flow(df: pd.DataFrame, source_col: str, target_col: str, **kwargs):
"""Quick flow diagram creation"""
viz = ModernDataVisualizer()
viz.create_flow_diagram(df, source_col, target_col, **kwargs)

def quick_distribution(df: pd.DataFrame, column: str, **kwargs):
"""Quick distribution chart creation"""
viz = ModernDataVisualizer()
viz.create_distribution_chart(df, column, **kwargs)

Example usage

if name == "main":
st.title("🎨 Modern Data Visualizer Demo")

# Sample data
sample_data = pd.DataFrame({
    'category': ['A', 'B', 'C', 'A', 'B'] * 20,
    'values': np.random.randn(100),
    'status': np.random.choice(['Active', 'Inactive', 'Pending'], 100)
})

viz = ModernDataVisualizer()

st.subheader("📊 Sample Visualizations")

col1, col2 = st.columns(2)
with col1:
    viz.create_metric_card(sample_data, 'values', 'mean')
with col2:
    viz.create_pie_chart(sample_data, 'category')

viz.create_distribution_chart(sample_data, 'values')
viz.create_flow_diagram(sample_data, 'category', 'status')
Enter fullscreen mode Exit fullscreen mode
Enter fullscreen mode Exit fullscreen mode

Top comments (0)