YASHWANTH CHIKKI HD

Posted on Jan 30

hii

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import confusion_matrix

import seaborn as sns
import statsmodels.api as sm
import pandas as pd
import os
df = pd.read_csv('CES .csv')
columns = ['SESSION_CLK_SEQ_NBR', 'PAGENAME', 'SHORTINTERACTION', 'VSCHANNEL', 'OS', 'MOBILE_DEVICE', 'SEC', 'TRANSACTION_START', 'TRANSACTION_COMPLETE']

result = df.groupby(['SESSION_ID', 'EASE_OF_USE']).apply(
lambda x: x[columns].to_dict('records')
).reset_index(name='list_of_dicts')

print(result)
ndf= result[result.apply(lambda x: any(d['TRANSACTION_START'] == 'dc change contribution/deferral' for d in x['list_of_dicts']), axis=1)]
ndf.shape

count total sessions and hits on the first step

def total_page_views(list_of_dicts):
start_found = 0

for d in list_of_dicts:
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period':
        start_found = start_found+1
        continue


return start_found

ndf['total_page_views'] = ndf['list_of_dicts'].apply(total_page_views)
count = ndf[ndf['total_page_views'] == 1].shape[0]
print("total_page_views:", count)

count total sessions and hits on the first step

def total_page_views(list_of_dicts):
start_found = 0

for d in list_of_dicts:
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount':
        start_found = start_found+1
        continue


return start_found

ndf_1['total_page_views'] = ndf_1['list_of_dicts'].apply(total_page_views)

Visited customer service page while the transaction was live

def check_CS_Page(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = start_found+1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        if d.get('PAGENAME') == 'netbenefits|utility bar|customer service|customer service':
            page_found = 1

return page_found

ndf['1_CS_Page_Visit'] = ndf['list_of_dicts'].apply(check_CS_Page)# Visited Search page while the transaction was live

def check_Search(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = start_found+1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        if d.get('PAGENAME') == 'netbenefits|search|search|search results':
            page_found = 1

return page_found

ndf['2_Search'] = ndf['list_of_dicts'].apply(check_Search)

Did a VA Chat while the transaction was live

def VA_Chat(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = start_found+1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        if d.get('PAGENAME') == '/mybenefits/core/pages/omni' or d.get('SHORTINTERACTION') == 'va_responseoptionclicked':
            page_found = 1

return page_found

ndf['3_VA_Chat'] = ndf['list_of_dicts'].apply(VA_Chat)

Incorrect Entry Error

def Incorrect_Entry_Error(list_of_dicts):
error_count = 0
for i in range(len(list_of_dicts)):
if i + 1 < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period' and \
list_of_dicts[i + 1].get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period (error)':
error_count = error_count +1

return error_count

ndf['4_Incorrect_Entry_Error'] = ndf['list_of_dicts'].apply(Incorrect_Entry_Error)

Server Error

def Server_Error(list_of_dicts):
error_count = 0
for d in list_of_dicts:
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::error':
error_count = error_count +1
return error_count
ndf['5_Server_Error'] = ndf['list_of_dicts'].apply(Server_Error)

Take home pay calculator

def Tool_Take_Home_Pay(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = start_found+1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        if d.get('PAGENAME') == 'netbenefits|small tools|take home pay calculator|take home pay calculator' :
            page_found = 1

return page_found

ndf['6_Tool_Pay_Home'] = ndf['list_of_dicts'].apply(Tool_Take_Home_Pay)

Contribution Calculator

def Tool_Contribution_Calc(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = start_found+1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        if d.get('PAGENAME') == '/mybenefits/employerservices/navigation/es2/contributioncalculator' :
            page_found = 1

return page_found

ndf['7_Tool_Contribution_Calc'] = ndf['list_of_dicts'].apply(Tool_Contribution_Calc)

Accessing Learn Hub while the transaction was live

def Access_Learn_Hub(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = start_found+1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        if d.get('PAGENAME') in ('netbenefits|learn|learn article|rothcontributions', 
                                 'netbenefits|learn|learn article|irslimits'):
            page_found = 1

return page_found

ndf['8_Access_Learn_Hub'] = ndf['list_of_dicts'].apply(Access_Learn_Hub)

Cancelation post submission

def Cancelation_Post_Submission(list_of_dicts):
start_found = 0
for i in range(len(list_of_dicts)):
if i + 1 < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' and \
list_of_dicts[i + 1].get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
start_found = start_found +1

    elif i + 2 < len(list_of_dicts) and \
         list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' and \
         list_of_dicts[i + 2].get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
         start_found = start_found +1

    elif i + 3 < len(list_of_dicts) and \
         list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' and \
         list_of_dicts[i + 3].get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
         start_found = start_found +1

return start_found

ndf['9_Cancelation_Post_Submission'] = ndf['list_of_dicts'].apply(Cancelation_Post_Submission)

Transaction abandonment without cancelation

def Abandonment(list_of_dicts):
start_found = 0
for i in range(len(list_of_dicts)):
if i < len(list_of_dicts) and \
list_of_dicts[i].get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount':
start_found = start_found+1

    if list_of_dicts[i].get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = start_found-1
        continue

if start_found < 0: start_found = 0
return start_found

ndf['10_Abandonment'] = ndf['list_of_dicts'].apply(Abandonment)

Multiple Transaction Starts

def Multiple_Txn_Starts(list_of_dicts):
start_count = 0
for d in list_of_dicts:
if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
start_count = start_count +1
if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
start_count = start_count -1

if start_count < 0: 
    start_count = 0
return start_count

ndf['11_Multiple_Txn_Starts'] = ndf['list_of_dicts'].apply(Multiple_Txn_Starts)

Multiple steps in the transaction

def Multiple_Steps(list_of_dicts):
start_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = 1
        continue
        #Va Interaction
    if d.get('PAGENAME') == '/mybenefits/core/pages/omni' or d.get('SHORTINTERACTION') == 'va_responseoptionclicked':
        continue
        #Page Error
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::error':
        continue
        # Server Error
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period (error)':
        continue
        # Cs Page Visit
    if d.get('PAGENAME') == 'netbenefits|utility bar|customer service|customer service':
        continue
        # Transaction cancelation
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
        continue
        # Tool usage
    if d.get('PAGENAME') == 'netbenefits|small tools|take home pay calculator|take home pay calculator':
        continue
        # tool usage
    if d.get('PAGENAME') == '/mybenefits/employerservices/navigation/es2/contributioncalculator':
        continue
        #Learn Hub usage
    if d.get('PAGENAME') == 'netbenefits|learn|learn article|rothcontributions':
        continue
        # Learn Hub usage
    if d.get('PAGENAME') == 'netbenefits|learn|learn article|irslimits':
        continue
        # Search usage
    if d.get('PAGENAME') == 'netbenefits|search|search|search results':
        continue

    start_found = start_found + 1
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        return(start_found -3)
        break

return 0

ndf['12_Multiple_Steps'] = ndf['list_of_dicts'].apply(Multiple_Steps)

Multiple steps in the transaction

def Multiple_Steps(list_of_dicts):
start_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found = 1
        continue
        #Va Interaction
    if d.get('PAGENAME') == '/mybenefits/core/pages/omni' or d.get('SHORTINTERACTION') == 'va_responseoptionclicked':
        continue
        #Page Error
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::error':
        continue
        # Server Error
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period (error)':
        continue
        # Cs Page Visit
    if d.get('PAGENAME') == 'netbenefits|utility bar|customer service|customer service':
        continue
        # Transaction cancelation
    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::cancel changes to contribution amount':
        continue
        # Tool usage
    if d.get('PAGENAME') == 'netbenefits|small tools|take home pay calculator|take home pay calculator':
        continue
        # tool usage
    if d.get('PAGENAME') == '/mybenefits/employerservices/navigation/es2/contributioncalculator':
        continue
        #Learn Hub usage
    if d.get('PAGENAME') == 'netbenefits|learn|learn article|rothcontributions':
        continue
        # Learn Hub usage
    if d.get('PAGENAME') == 'netbenefits|learn|learn article|irslimits':
        continue
        # Search usage
    if d.get('PAGENAME') == 'netbenefits|search|search|search results':
        continue

    start_found = start_found + 1
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        return(start_found -3)
        break

return 0

ndf['12_Multiple_Steps'] = ndf['list_of_dicts'].apply(Multiple_Steps)

App Accessd during the transaction

def App_Access(list_of_dicts):
start_found = 0
page_found = 0

for d in list_of_dicts:
    if d.get('TRANSACTION_START') == 'dc change contribution/deferral':
        start_found += 1
        continue
    if d.get('TRANSACTION_COMPLETE') == 'dc change contribution/deferral':
        start_found = 0
        continue

    if start_found > 0:
        pagename = d.get('PAGENAME', '')
        if pagename.startswith('nb app') or pagename.startswith('nbm'):
            page_found = 1

return page_found

ndf['13_App_Access'] = ndf['list_of_dicts'].apply(App_Access)

Remove all sessions with only one instance of the first transaction step

def single_start_instance(list_of_dicts):
count1 = sum(
1 for d in list_of_dicts
if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::contribution amount per pay period' and \
d.get('TRANSACTION_START') == 'dc change contribution/deferral'

)

count2=sum(

    1 for d in list_of_dicts

    if d.get('PAGENAME') == 'netbenefits|dc|investments|contributions::review and submit contribution amount' )

return 1 if count1==1  and count2==0 else 0

Apply the function to your DataFrame

ndf['single_start_instance'] = ndf['list_of_dicts'].apply(single_start_instance)
df_reg['Y'] = np.where(ndf['EASE_OF_USE'] == 'Very difficult', 1,
np.where(ndf['EASE_OF_USE'] == 'Difficult', 1,

np.where(ndf['EASE_OF_USE'] == 'Neither easy nor difficult', 0,
np.where(ndf['EASE_OF_USE'] == 'Easy', 0,0
))))
df_reg = df_reg[['1_CS_Page_Visit', '2_Search', '3_VA_Chat', '4_Incorrect_Entry_Error', '5_Server_Error', '6_Tool_Pay_Home',
'7_Tool_Contribution_Calc', '8_Access_Learn_Hub', '9_Cancelation_Post_Submission', '10_Abandonment',
'11_Multiple_Txn_Starts', '12_Multiple_Steps', '13_App_Access', 'Y' ]]
X = df_reg[['2_Search', '3_VA_Chat', '4_Incorrect_Entry_Error', '5_Server_Error',
'7_Tool_Contribution_Calc','8_Access_Learn_Hub', '9_Cancelation_Post_Submission', '10_Abandonment', '11_Multiple_Txn_Starts',
'13_App_Access']]
x = df_reg.iloc[:, 1:11] # Selects columns by index (1 to 10)
X = sm.add_constant(x)
model_sm = sm.Logit(y, X)
results = model_sm.fit()
y_pred_proba = results.predict(X)
y_true = y
fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc_score(y_true, y_pred_proba))
plt.plot([0, 1], [0, 1], 'r--') # Add the random chance line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap="YlGnBu" ,fmt='g')
plt.xlabel('Predicted label')
plt.ylabel('Actual label')
plt.title('Confusion Matrix')
plt.show()
FPR = cm[0, 1] / (cm[0, 1] + cm[0, 0])
print('False Positive Rate = ', FPR)
FNR = cm[0,0] / (cm[1,0] + cm[0,0])
print('False Negative Rate = ', FNR)