# Cloud driven Loan Defalut predictor using machine learning
## Task 1. Launching an Amazon sage make instance
# Task 2. upload note book into jupiter
# Task 3. Data Loading
import boto3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import sagemaker
role = sagemaker.get_execution_role()
bucket_name = "loan-data"
folder_name = "loan_cleaned_data"
file_name = "loan_cleaned_data.csv"
s3_url= f"s3://{bucket_name}/{folder_name}/{file_name}"
df = pd.read_csv(s3_url)
#Task 4. Feature engineering (One hot Encoding)
df_encoded = pd.get_dummies(df,columns=['purpose'], dtype=int)
# Task 5. Data preprocessing (Handling imbalanced data)
class_counts = df_encoded['not_fully_paid'].value_counts()
majority_class = df_encoded[df_encoded['not_fully_paid']==0]
minority_class = df_encoded[df_encoded['not_fully_paid']==1]
minority_unsample = resample(minority_class,replace=True,n_samples=len(majority_class),random_state=42)
df_balanced = pd.concat([majority_class,minority_unsample])
# Task 6. Model Training
X = df_balanced.drop(columns=['sl_no','not_fully_paid'])
y = df_balanced['not_fully_paid']
X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.4,random_state=42)
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
# Task 7. Model Evaluation
y_pred = rf.predict(X_test)
print(classification_report(y_test,y_pred))
This site is built on Heroku
Join the ranks of developers at Salesforce, Airbase, DEV, and more who deploy their mission critical applications on Heroku. Sign up today and launch your first app!
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)