import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import statsmodels.api as sm
import pandas as pd
import os
df = pd.read_csv('CES .csv')
columns = ['SESSION_CLK_SEQ_NBR', 'PAGENAME', 'SHORTINTERACTION', 'VSCHANNEL', 'OS', 'MOBILE_DEVICE', 'SEC', 'TRANSACTION_START', 'TRANSACTION_COMPLETE']
result = df.groupby(['SESSION_ID', 'EASE_OF_USE']).apply(
lambda x: x[columns].to_dict('records')
).reset_index(name='list_of_dicts')
print(result)
ndf= result[result.apply(lambda x: any(d['TRANSACTION_START'] == 'dc change contribution/deferral' for d in x['list_of_dicts']), axis=1)]
ndf.shape
count total sessions and hits on the first step
def total_page_views(list_of_dicts):
start_found = 0
for d in list_of_dicts:
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period':
start_found = start_found+1
continue
return start_found
ndf['total_page_views'] = ndf['list_of_dicts'].apply(total_page_views)
count = ndf[ndf['total_page_views'] == 1].shape[0]
print("total_page_views:", count)
count total sessions and hits on the first step
def total_page_views(list_of_dicts):
start_found = 0
for d in list_of_dicts:
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount':
start_found = start_found+1
continue
return start_found
ndf_1['total_page_views'] = ndf_1['list_of_dicts'].apply(total_page_views)
Visited customer service page while the transaction was live
def check_CS_Page(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = start_found+1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
if d.get('PAGENAME') == 'netbenefits|utility bar|customer service|customer service':
page_found = 1
return page_found
ndf['1_CS_Page_Visit'] = ndf['list_of_dicts'].apply(check_CS_Page)# Visited Search page while the transaction was live
def check_Search(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = start_found+1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
if d.get('PAGENAME') == 'netbenefits|search|search|search results':
page_found = 1
return page_found
ndf['2_Search'] = ndf['list_of_dicts'].apply(check_Search)
Did a VA Chat while the transaction was live
def VA_Chat(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = start_found+1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
if d.get('PAGENAME') == '/mybenefits/core/pages/omni' or d.get('SHORTINTERACTION') == 'va_responseoptionclicked':
page_found = 1
return page_found
ndf['3_VA_Chat'] = ndf['list_of_dicts'].apply(VA_Chat)
Incorrect Entry Error
def Incorrect_Entry_Error(list_of_dicts):
error_count = 0
for i in range(len(list_of_dicts)):
if i + 1 < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period' and \
list_of_dicts[i + 1].get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period (error)':
error_count = error_count +1
return error_count
ndf['4_Incorrect_Entry_Error'] = ndf['list_of_dicts'].apply(Incorrect_Entry_Error)
Server Error
def Server_Error(list_of_dicts):
error_count = 0
for d in list_of_dicts:
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::error':
error_count = error_count +1
return error_count
ndf['5_Server_Error'] = ndf['list_of_dicts'].apply(Server_Error)
Take home pay calculator
def Tool_Take_Home_Pay(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = start_found+1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
if d.get('PAGENAME') == 'netbenefits|small tools|take home pay calculator|take home pay calculator' :
page_found = 1
return page_found
ndf['6_Tool_Pay_Home'] = ndf['list_of_dicts'].apply(Tool_Take_Home_Pay)
Contribution Calculator
def Tool_Contribution_Calc(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = start_found+1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
if d.get('PAGENAME') == '/mybenefits/employerservices/navigation/es2/contributioncalculator' :
page_found = 1
return page_found
ndf['7_Tool_Contribution_Calc'] = ndf['list_of_dicts'].apply(Tool_Contribution_Calc)
Accessing Learn Hub while the transaction was live
def Access_Learn_Hub(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = start_found+1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
if d.get('PAGENAME') in ('netbenefits|learn|learn article|rothcontributions',
'netbenefits|learn|learn article|irslimits'):
page_found = 1
return page_found
ndf['8_Access_Learn_Hub'] = ndf['list_of_dicts'].apply(Access_Learn_Hub)
Cancelation post submission
def Cancelation_Post_Submission(list_of_dicts):
start_found = 0
for i in range(len(list_of_dicts)):
if i + 1 < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' and \
list_of_dicts[i + 1].get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
start_found = start_found +1
elif i + 2 < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' and \
list_of_dicts[i + 2].get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
start_found = start_found +1
elif i + 3 < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' and \
list_of_dicts[i + 3].get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
start_found = start_found +1
return start_found
ndf['9_Cancelation_Post_Submission'] = ndf['list_of_dicts'].apply(Cancelation_Post_Submission)
Transaction abandonment without cancelation
def Abandonment(list_of_dicts):
start_found = 0
for i in range(len(list_of_dicts)):
if i < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount':
start_found = start_found+1
if list_of_dicts[i].get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = start_found-1
continue
if start_found < 0: start_found = 0
return start_found
ndf['10_Abandonment'] = ndf['list_of_dicts'].apply(Abandonment)
Multiple Transaction Starts
def Multiple_Txn_Starts(list_of_dicts):
start_count = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_count = start_count +1
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_count = start_count -1
if start_count < 0:
start_count = 0
return start_count
ndf['11_Multiple_Txn_Starts'] = ndf['list_of_dicts'].apply(Multiple_Txn_Starts)
Multiple steps in the transaction
def Multiple_Steps(list_of_dicts):
start_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = 1
continue
#Va Interaction
if d.get('PAGENAME') == '/mybenefits/core/pages/omni' or d.get('SHORTINTERACTION') == 'va_responseoptionclicked':
continue
#Page Error
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::error':
continue
# Server Error
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period (error)':
continue
# Cs Page Visit
if d.get('PAGENAME') == 'netbenefits|utility bar|customer service|customer service':
continue
# Transaction cancelation
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
continue
# Tool usage
if d.get('PAGENAME') == 'netbenefits|small tools|take home pay calculator|take home pay calculator':
continue
# tool usage
if d.get('PAGENAME') == '/mybenefits/employerservices/navigation/es2/contributioncalculator':
continue
#Learn Hub usage
if d.get('PAGENAME') == 'netbenefits|learn|learn article|rothcontributions':
continue
# Learn Hub usage
if d.get('PAGENAME') == 'netbenefits|learn|learn article|irslimits':
continue
# Search usage
if d.get('PAGENAME') == 'netbenefits|search|search|search results':
continue
start_found = start_found + 1
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
return(start_found -3)
break
return 0
ndf['12_Multiple_Steps'] = ndf['list_of_dicts'].apply(Multiple_Steps)
Multiple steps in the transaction
def Multiple_Steps(list_of_dicts):
start_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found = 1
continue
#Va Interaction
if d.get('PAGENAME') == '/mybenefits/core/pages/omni' or d.get('SHORTINTERACTION') == 'va_responseoptionclicked':
continue
#Page Error
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::error':
continue
# Server Error
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period (error)':
continue
# Cs Page Visit
if d.get('PAGENAME') == 'netbenefits|utility bar|customer service|customer service':
continue
# Transaction cancelation
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
continue
# Tool usage
if d.get('PAGENAME') == 'netbenefits|small tools|take home pay calculator|take home pay calculator':
continue
# tool usage
if d.get('PAGENAME') == '/mybenefits/employerservices/navigation/es2/contributioncalculator':
continue
#Learn Hub usage
if d.get('PAGENAME') == 'netbenefits|learn|learn article|rothcontributions':
continue
# Learn Hub usage
if d.get('PAGENAME') == 'netbenefits|learn|learn article|irslimits':
continue
# Search usage
if d.get('PAGENAME') == 'netbenefits|search|search|search results':
continue
start_found = start_found + 1
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
return(start_found -3)
break
return 0
ndf['12_Multiple_Steps'] = ndf['list_of_dicts'].apply(Multiple_Steps)
App Accessd during the transaction
def App_Access(list_of_dicts):
start_found = 0
page_found = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_found += 1
continue
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_found = 0
continue
if start_found > 0:
pagename = d.get('PAGENAME', '')
if pagename.startswith('nb app') or pagename.startswith('nbm'):
page_found = 1
return page_found
ndf['13_App_Access'] = ndf['list_of_dicts'].apply(App_Access)
Remove all sessions with only one instance of the first transaction step
def single_start_instance(list_of_dicts):
count1 = sum(
1 for d in list_of_dicts
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period' and \
d.get('TRANSACTION_START') == 'dc change contribution/deferral'
)
count2=sum(
1 for d in list_of_dicts
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' )
return 1 if count1==1 and count2==0 else 0
Apply the function to your DataFrame
ndf['single_start_instance'] = ndf['list_of_dicts'].apply(single_start_instance)
df_reg['Y'] = np.where(ndf['EASE_OF_USE'] == 'Very difficult', 1,
np.where(ndf['EASE_OF_USE'] == 'Difficult', 1,
np.where(ndf['EASE_OF_USE'] == 'Neither easy nor difficult', 0,
np.where(ndf['EASE_OF_USE'] == 'Easy', 0,0
))))
df_reg = df_reg[['1_CS_Page_Visit', '2_Search', '3_VA_Chat', '4_Incorrect_Entry_Error', '5_Server_Error', '6_Tool_Pay_Home',
'7_Tool_Contribution_Calc', '8_Access_Learn_Hub', '9_Cancelation_Post_Submission', '10_Abandonment',
'11_Multiple_Txn_Starts', '12_Multiple_Steps', '13_App_Access', 'Y' ]]
X = df_reg[['2_Search', '3_VA_Chat', '4_Incorrect_Entry_Error', '5_Server_Error',
'7_Tool_Contribution_Calc','8_Access_Learn_Hub', '9_Cancelation_Post_Submission', '10_Abandonment', '11_Multiple_Txn_Starts',
'13_App_Access']]
x = df_reg.iloc[:, 1:11] # Selects columns by index (1 to 10)
X = sm.add_constant(x)
model_sm = sm.Logit(y, X)
results = model_sm.fit()
y_pred_proba = results.predict(X)
y_true = y
fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc_score(y_true, y_pred_proba))
plt.plot([0, 1], [0, 1], 'r--') # Add the random chance line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap="YlGnBu" ,fmt='g')
plt.xlabel('Predicted label')
plt.ylabel('Actual label')
plt.title('Confusion Matrix')
plt.show()
FPR = cm[0, 1] / (cm[0, 1] + cm[0, 0])
print('False Positive Rate = ', FPR)
FNR = cm[0,0] / (cm[1,0] + cm[0,0])
print('False Negative Rate = ', FNR)
Top comments (0)