YASHWANTH CHIKKI HD

Posted on Jan 28 • Edited on Jan 30

hi

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

-----------------------------

Load and clean data

-----------------------------

df = pd.read_excel("data.xlsx")

actual_column_map = {
"Advisor IP Name": "Advisor_not_used",
"Advisor_IP_ID":"Advisor_IP_ID_not_used",
"clients with individual relationship": "Clients_individual",
"a part of a team": "Clients_team_not_used",
"Total AUM": "Total_AUM",
"MD count": "MD_count",
"Client Count": "Client_count",
"Total RK Assets":"Total_RK_Assets",
"Total FMF Assets":"Total_FMF_Assets" ,

"Total external assets (Nexus)": "External_assets",
"Advisor Tenure" :"Advisor_Tenure",

"Recency - Last client onboarded (in days)": "Recency_days",
"Frequency_2024_N": "Frequency_2024",
"Frequency_2025_N": "Frequency_2025",
"Products per client (only Core clients)": "Products_per_client",
"Attrition_rate_N":"Attrition_rate_N"
}
data = df.rename(columns=actual_column_map)

-----------------------------

Derived columns

-----------------------------

data["Total_assets_not_used"] = data["Total_AUM"] + data["External_assets"]

data["Pct_external"] = np.where(
data["Total_assets_not_used"] == 0,
0,
data["External_assets"] / data["Total_assets_not_used"]
)

data["Ind_ratio"] = np.where(
data["Client_count"] == 0,
0,
data["Clients_individual"] / data["Client_count"]

)

-----------------------------

Sidebar controls

-----------------------------

st.sidebar.header("Ranking Controls")

core_weights = {
"Advisor_Tenure": st.sidebar.slider("Weight: Advisor_Tenure", 0.0, 100.0, 6.67),
"MD_count": st.sidebar.slider("Weight: MD count", 0.0, 100.0, 6.67),
"Client_count": st.sidebar.slider("Weight: Client Count", 0.0, 100.0, 6.67),
"Ind_ratio": st.sidebar.slider("Weight: percentage ofClients with 1 on 1 relation", 0.0, 100.0, 6.67),
"Total_RK_Assets": st.sidebar.slider("Weight: Total RK Assets", 0.0, 100.0, 6.67),
"Total_AUM": st.sidebar.slider("Weight: Total AUM", 0.0, 100.0, 6.67),
"Total_FMF_Assets": st.sidebar.slider("Weight: Total FMF Assets", 0.0, 100.0, 6.67),
"Recency_days": st.sidebar.slider("Weight: Recency", 0.0, 100.0, 6.67),
"Frequency_2024": st.sidebar.slider("Weight: New Clients 2024", 0.0, 100.0, 6.67),
"Frequency_2025": st.sidebar.slider("Weight: New Clients 2025", 0.0, 100.0, 6.67),
"External_assets": st.sidebar.slider("Weight: External Assets", 0.0, 100.0, 6.67),
"Pct_external": st.sidebar.slider("Weight: percentage share of external assets", 0.0, 100.0, 6.67),
"Products_per_client": st.sidebar.slider("Weight: Products_per_client", 0.0, 100.0, 6.67),

"NPS_N": st.sidebar.slider("Weight: NPS", 0.0, 100.0, 6.67),
"Attrition_rate_N": st.sidebar.slider("Weight: Attrition_rate_N", 0.0, 100.0, 6.67),

}

Normalize weights

total_wt = sum(core_weights.values())
if total_wt == 0: total_wt = 1
norm_weights = {k: v / total_wt for k, v in core_weights.items()}

-----------------------------

Extra Filters

-----------------------------

rank_df = data.copy()

st.sidebar.header("Extra Filters")
toggle_val = st.sidebar.radio("Concierge_Flag", ("All", "1 only", "0 only"))

if toggle_val == "1 only":
rank_df = rank_df[rank_df["Concierge_Flag"] == 1]
elif toggle_val == "0 only":
rank_df = rank_df[rank_df["Concierge_Flag"] == 0]

from sklearn.preprocessing import MinMaxScaler

Identify numeric columns to scale (excluding 'Advisor' and non-numeric ones)

numeric_cols = rank_df.select_dtypes(include=[np.number]).columns

rank_df["NPS_N"] = pd.to_numeric(rank_df["NPS_N"], errors="coerce").fillna(0)

Define columns to scale

columns_to_scale = [
"Advisor_Tenure","MD_count","Client_count","Ind_ratio","Total_RK_Assets", "Total_AUM","Total_FMF_Assets","Recency_days","Frequency_2024","Frequency_2025","External_assets","Pct_external","Products_per_client","NPS_N","Attrition_rate_N"

]

Define columns with negative correlation

negatively_correlated = ["Recency_days"]

Replace non-numeric values with 0 in all columns used for scoring

for col in core_weights.keys():
if col in rank_df.columns:
rank_df[col] = pd.to_numeric(rank_df[col], errors='coerce').fillna(0)

Apply Min-Max scaling only to selected columns

scaler = MinMaxScaler()
rank_df[columns_to_scale] = scaler.fit_transform(rank_df[columns_to_scale])

Reverse scaling for negatively correlated columns

for col in negatively_correlated:
if col in rank_df.columns:
rank_df[col] = 1 - rank_df[col]

Normalize weights to sum to 1

total_wt = sum(core_weights.values())
if total_wt == 0: total_wt = 1
norm_weights = {k: v / total_wt for k, v in core_weights.items()}

rank_df["count"] = rank_df["NPS_N"].apply(lambda x: 15 if x > 0 else 14)

Compute score using normalized weights and scaled features

rank_df["Score"] = rank_df.apply(
lambda row: sum(row[col] * norm_weights.get(col, 0) for col in norm_weights),
axis=1
)
rank_df["Final_Score"] = rank_df["Score"] / rank_df["count"]

data["Score"] = rank_df["Final_Score"]

Sort by Score

plot_df = data.sort_values(by="Score", ascending=False)

col1, col2, col3 = st.columns(3)

col1.metric("Total Advisors", len(plot_df))
col2.metric("Total Clients", int(plot_df["Client_count"].sum()))
col3.metric("Total Assets (M)", f"${plot_df['Total_assets_not_used'].sum()/1e6:.1f}M")

plot_df["Rank"] = plot_df["Score"].rank(method="min", ascending=False).astype(int)

display_columns = {
"Advisor_not_used": "Advisor Name",
"Advisor_IP_ID_not_used": "Advisor ID",
"Score": "Score",
"Client_count": "Client Count",
"Total_AUM": "Total AUM",
"External_assets": "External Assets",
"Rank": "Rank"
}

-----------------------------

Ranked Advisors Table

-----------------------------

st.subheader("Ranked Advisors")
st.dataframe(
plot_df.sort_values(by="Score", ascending=False)[list(display_columns.keys())]
.rename(columns=display_columns)
)

-----------------------------/////////////////////////////////////////////////////////////////////////////////////////////////////////////

Advisor Details

-----------------------------

st.subheader("Advisor Details")

Sort plot_df by Final_Score before populating the dropdown

sorted_plot_df = plot_df.sort_values(by="Score", ascending=False)

selected = st.selectbox("Select an advisor", sorted_plot_df["Advisor_not_used"])

if selected:
adv = plot_df[plot_df["Advisor_not_used"] == selected].iloc[0]
adv_scaled = rank_df[rank_df["Advisor_not_used"] == selected].iloc[0]

st.write(f"### {adv['Advisor_not_used']} - Final Score: {adv['Score']:.2f}")

import plotly.express as px

  
  
  Feature contributions


contributions = {}

for feature in norm_weights:

    if feature in adv_scaled:

        value = adv_scaled[feature]

        weight = norm_weights[feature]

        contribution = value * weight

        contributions[feature] = contribution

total_score = adv_scaled["Score"]

contribution_percentages = {

    k: (v / total_score) * 100 for k, v in contributions.items()

}

contrib_df = pd.DataFrame({

    "Feature": list(contribution_percentages.keys()),

    "Contribution (%)": [round(v, 1) for v in contribution_percentages.values()]

}).sort_values(by="Contribution (%)", ascending=False)

  
  
  Horizontal bar chart


bar_fig = px.bar(

    contrib_df,

    y="Feature",

    x="Contribution (%)",

    orientation='h',

    title=f"Feature Contribution to Score for {adv['Advisor_not_used']}",

    text=contrib_df["Contribution (%)"].apply(lambda x: f"{x}%"),

    labels={"Contribution (%)": "Contribution to Score (%)"}

)

st.plotly_chart(bar_fig, use_container_width=True)

  
  
  --- Pie Charts ---


col1, col2 = st.columns(2)

with col1:

    fig1, ax1 = plt.subplots()

    ax1.pie([adv["Total_AUM"], adv["External_assets"]],

            labels=["Our AUM", "External"],

            autopct="%1.1f%%", startangle=90, colors=["#4C72B0", "#55A868"])

    ax1.set_title("Assets Split")

    st.pyplot(fig1)

with col2:

    fig2, ax2 = plt.subplots()

    ax2.pie([adv["Clients_individual"], adv["Clients_team_not_used"]],

            labels=["Individual", "Team"],

            autopct="%1.1f%%", startangle=90, colors=["#FF6F61", "#6A5ACD"])

    ax2.set_title("Client Split")

    st.pyplot(fig2)

  
  
  --- Additional Metrics ---


st.write("Additional Metrics:")

st.write(f"- External Assets Share: {adv['Pct_external']:.2%}")

st.write(f"- Total AUM: ${adv['Total_AUM']:.0f}")

st.write(f"- Products per Client: {round(adv['Products_per_client'], 2)}")

st.write(f"- Rank: {adv['Rank']}")

--- Matplotlib Box Plot ---

st.subheader("Advisor Final Score Distribution (Box Plot)")
fig, ax = plt.subplots(figsize=(10, 4))
sns.boxplot(x=plot_df["Score"], ax=ax)
ax.set_title("Advisor Score Distribution")
ax.set_xlabel(" Score")
st.pyplot(fig)

--- Dynamic Percentage Inputs ---

st.subheader("Set Custom Class Thresholds (as Percentiles)")
pct_q1 = st.slider("Q1 upper bound (%)", min_value=0, max_value=100, value=25)
pct_q3 = st.slider("Q3 lower bound (%)", min_value=pct_q1, max_value=100, value=75)

--- Convert Percentiles to Score Thresholds ---

score_q1 = plot_df['Score'].quantile(pct_q1 / 100)
score_q3 = plot_df['Score'].quantile(pct_q3 / 100)

--- Classification Based on Thresholds ---

def classify_custom(score):
if score < score_q1:
return "Q1 (Lowest)"
elif score_q1 <= score <= score_q3:
return "Q2 (Middle)"
else:
return "Q3 (Highest)"

plot_df["Class"] = plot_df["Score"].apply(classify_custom)

--- Summary Stats by Class ---

summary = plot_df.groupby("Class").agg({
"Advisor_not_used": "count",
"Client_count": "sum",
"Total_AUM": "sum",
"Total_assets_not_used": "sum",
"External_assets": "mean",
"Products_per_client": "mean"
}).rename(columns={
"Advisor_not_used": "Advisor Count",
"Client_count": "Total Clients",
"Total_AUM": "Total AUM",
"Total_assets_not_used": "Total Assets",
"External_assets": "Avg External Assets",
"Products_per_client": "Avg Products per Client"
}).reset_index()

st.subheader("Class Summary")
st.dataframe(summary)

--- Advisor Lists by Class ---

for cls in ["Q1 (Lowest)", "Q2 (Middle)", "Q3 (Highest)"]:
st.markdown(f"### {cls} Advisors")
q_df = plot_df[plot_df["Class"] == cls]
st.write(q_df[[
"Advisor_not_used", "Score", "Client_count", "Total_AUM",
"Total_assets_not_used", "External_assets", "Products_per_client"
]].rename(columns={
"Advisor_not_used": "Advisor",
"Score": "Score",
"Total_assets_not_used": "Total Assets"
}))

--- Histogram of Recency ---

fig, ax = plt.subplots(figsize=(7, 4))
ax.hist(rank_df["Recency_days"], bins=10, color="#55A868", alpha=0.8, edgecolor='black')
ax.set_title("Distribution of Recency Across Advisors (Days)")
ax.set_xlabel("Days since last onboarding")
ax.set_ylabel("Number of Advisors")
st.pyplot(fig)

--- AUM-Based Filtering ---

st.subheader("Filter Advisors by AUM Range")

aum_min = st.number_input("Minimum AUM ($)", min_value=0, value=0, step=100000)
aum_max = st.number_input("Maximum AUM ($)", min_value=0, value=int(data["Total_AUM"].max()), step=100000)

apply_aum_filter = st.checkbox("Apply AUM Filter")

if apply_aum_filter:
plot_df = plot_df[
(plot_df["Total_AUM"] >= aum_min) &
(plot_df["Total_AUM"] <= aum_max)
]