<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Rupak Biswas</title>
    <description>The latest articles on DEV Community by Rupak Biswas (@rupak2001).</description>
    <link>https://dev.to/rupak2001</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F744879%2F000f5885-fea4-467e-af60-6ec9349ac10e.jpeg</url>
      <title>DEV Community: Rupak Biswas</title>
      <link>https://dev.to/rupak2001</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/rupak2001"/>
    <language>en</language>
    <item>
      <title>Demonstrating K means Clustering on Iris Dataset</title>
      <dc:creator>Rupak Biswas</dc:creator>
      <pubDate>Wed, 11 Oct 2023 18:14:09 +0000</pubDate>
      <link>https://dev.to/rupak2001/demonstrating-k-means-clustering-on-iris-dataset-4f3d</link>
      <guid>https://dev.to/rupak2001/demonstrating-k-means-clustering-on-iris-dataset-4f3d</guid>
      <description>&lt;p&gt;K-means clustering was performed to evaluate the possible clusters can be derived from the features of the given dataset hence giving the unsupervised model. The following explanatory variables were included as possible contributors to a K-means Clustering model (output) includes the petal length &amp;amp; petal width.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Python Code&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;iris = load_iris(as_frame=True)
iris.data
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

X = iris.data
X_act = iris.data 
X = X.drop(['sepal length (cm)','sepal width (cm)'],axis=1)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X[['petal length (cm)']])
X['Scaled_PL'] = scaler.transform(X[['petal length (cm)']])
scaler.fit(X[['petal width (cm)']])
X['Scaled_PW'] = scaler.transform(X[['petal width (cm)']])

X = X.drop(['petal length (cm)','petal width (cm)'],axis=1)

plt.scatter(X['Scaled_PL'],X['Scaled_PW'])
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;a href="https://res.cloudinary.com/practicaldev/image/fetch/s--U7BaP1wt--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/le0idcw6dsaybe0yabcz.png" class="article-body-image-wrapper"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--U7BaP1wt--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/le0idcw6dsaybe0yabcz.png" alt="Scatter Plot" width="800" height="565"&gt;&lt;/a&gt;&lt;br&gt;
&lt;em&gt;Scatter Plot Showing the possible cluster between petal length and petal width&lt;/em&gt;&lt;/p&gt;



&lt;p&gt;&lt;strong&gt;Output&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;from sklearn.cluster import KMeans
model = KMeans(n_clusters = 2)
model.fit(X)

predictions = model.predict(X)
X['clusters'] = predictions
X
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;a href="https://res.cloudinary.com/practicaldev/image/fetch/s--ACSgSw0s--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/gepwmo7havrnuxpxkk4n.png" class="article-body-image-wrapper"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--ACSgSw0s--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/gepwmo7havrnuxpxkk4n.png" alt="list" width="556" height="593"&gt;&lt;/a&gt;&lt;br&gt;
&lt;em&gt;Prediction shows cluster number (Here we have 2)&lt;/em&gt;&lt;/p&gt;




&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;cluster0 = X[['Scaled_PL','Scaled_PW']][X.clusters == 0]
cluster1 = X[['Scaled_PL','Scaled_PW']][X.clusters == 1]
centroids = model.cluster_centers_
plt.scatter(cluster0['Scaled_PL'],cluster0['Scaled_PW'],color="yellow",label="Cluster1")
plt.scatter(cluster1['Scaled_PL'],cluster1['Scaled_PW'],color="orange",label="Cluster2")
plt.scatter(centroids[:,0],centroids[:,1],marker="*",color="purple",label="centroid")
plt.xlabel("petal length")
plt.ylabel("petal width")
plt.legend()
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;p&gt;&lt;a href="https://res.cloudinary.com/practicaldev/image/fetch/s--5AAM_e5B--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/fmaxw3eyhpk0i7mzsyo4.png" class="article-body-image-wrapper"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--5AAM_e5B--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/fmaxw3eyhpk0i7mzsyo4.png" alt="predicted plot" width="800" height="536"&gt;&lt;/a&gt;&lt;br&gt;
&lt;em&gt;Scatter Plot representing the 2 predicted cluster along with it's centroids&lt;/em&gt;&lt;/p&gt;



&lt;p&gt;&lt;strong&gt;Finding Elbow&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;SSE = []

for i in range(1,11):
    test_model = KMeans(n_clusters=i)
    test_model.fit(X[['Scaled_PL','Scaled_PW']])
    SSE.append(test_model.inertia_)

plt.plot(SSE)
plt.xlabel("K")
plt.ylabel("SSE")
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;a href="https://res.cloudinary.com/practicaldev/image/fetch/s--zvI9toFj--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/yq0jjk8wl60t136mvjkh.png" class="article-body-image-wrapper"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--zvI9toFj--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/yq0jjk8wl60t136mvjkh.png" alt="elbow plot" width="800" height="551"&gt;&lt;/a&gt;&lt;br&gt;
&lt;em&gt;Possible elbow found for the predicted Kmeans model&lt;/em&gt;&lt;/p&gt;




&lt;p&gt;Here We get the elbow curve at nearly 1 to 5 range (No.of clusters). To get accurate predictions we should put the K value between 1 to 5&lt;/p&gt;

</description>
    </item>
    <item>
      <title>House price prediction using Lasso Regression</title>
      <dc:creator>Rupak Biswas</dc:creator>
      <pubDate>Fri, 06 Oct 2023 18:23:53 +0000</pubDate>
      <link>https://dev.to/rupak2001/house-price-prediction-using-lasso-regression-4lng</link>
      <guid>https://dev.to/rupak2001/house-price-prediction-using-lasso-regression-4lng</guid>
      <description>&lt;p&gt;Lasso Regreesion analysis was performed to evaluate the importance of a series of explanatory variables in predicting a probable answer or price in this case. The following explanatory variables were included as possible contributors to a Lasso Regression evaluating the probable price of a house in melbourne (output) includes the area, no.of rooms, landsize and many more.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;This is my copy of colab notebook so evryone can see the output along with the code&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;import pandas as pd
df = pd.read_csv("Melbourne_housing_FULL.csv")
df.nunique()

Suburb 351
Address 34009
Rooms 12
Type 3
Price 2871
Method 9
SellerG 388
Date 78
Distance 215
Postcode 211
Bedroom2 15
Bathroom 11
Car 15
Landsize 1684
BuildingArea 740
YearBuilt 160
CouncilArea 33
Lattitude 13402
Longtitude 14524
Regionname 8
Propertycount 342
dtype: int64

dfS = df[['Suburb', 'Rooms', 'Type', 'Method', 'SellerG', 'Regionname', 'Propertycount', 
               'Distance', 'CouncilArea', 'Bedroom2', 'Bathroom', 'Car', 'Landsize', 'BuildingArea', 'Price']]

Suburb Rooms Type Method SellerG Regionname Propertycount Distance CouncilArea Bedroom2 Bathroom
0 Abbotsford 2 h SS Jellis Northern
Metropolitan 4019.0 2.5 Yarra City Council 2.0 1.0
34857 rows × 15 columns

dfS
dfS.isna().sum()

Suburb 0
Rooms 0
Type 0
Method 0
SellerG 0
Regionname 3
Propertycount 3
Distance 1
CouncilArea 3


dfS[['Propertycount','Distance','Bedroom2','Bathroom','Car']] = dfS[['Propertycount','Distance','
dfS['Landsize']=dfS['Landsize'].fillna(dfS['Landsize'].mean())
dfS['BuildingArea']=dfS['BuildingArea'].fillna(dfS['BuildingArea'].mean())

dfS.dropna(inplace=True)
dfS = pd.get_dummies(dfS,drop_first=True)

X = dfS.drop 
Y= dfS['Price']

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)
from sklearn.linear_model import Lasso #l1
CPU times: user 3 µs, sys: 2 µs, total: 5 µs
Wall time: 10.3 µs
▾ Lasso
Lasso(alpha=50, tol=0.1)
lasso = Lasso(alpha=50, max_iter=1000, tol=0.1)
lasso.fit(X_train,Y_train)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Output&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;predictions = lasso.predict(X_test)
predictions
array([1323721.23922339, 721160.34344916, 623689.80964616, ...,
 987946.0460597 , 983561.59313765, 160658.00272658])
lasso.score(X_test,Y_test)
0.6388165172009165

&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;As mentioned in the code from the Lasso Regression Analysis, We get an overall accuracy of about 63% (as shown in code)&lt;/strong&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Survival Prediction in Titanic Using Decision Tree</title>
      <dc:creator>Rupak Biswas</dc:creator>
      <pubDate>Thu, 05 Oct 2023 18:23:48 +0000</pubDate>
      <link>https://dev.to/rupak2001/survival-prediction-in-titanic-using-decision-tree-3c2h</link>
      <guid>https://dev.to/rupak2001/survival-prediction-in-titanic-using-decision-tree-3c2h</guid>
      <description>&lt;p&gt;Decision Tree analysis was performed to evaluate the importance of a series of explanatory variables in predicting a binary, categorical response variable. The following explanatory variables were included as possible contributors to a decision tree evaluating Survival of a person from the Titanic wreckage (output) includes the Passenger-class, Sex, Age, &amp;amp; Fare.&lt;/p&gt;

&lt;h2&gt;
  
  
  Python Code
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;This is my copy of colab notebook so evryone can see the output along with the code&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;import pandas as pd
df = pd.read_csv('titanic.csv')

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1
Cumings, Mrs. John
Bradley (Florence Briggs
Th...
female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2.
3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques
Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

df.head(5)
X=df[['Pclass','Sex','Age','Fare','Survived']]
from sklearn.preprocessing import LabelEncoder
 X.Sex = le.fit_transform(X.Sex)

Pclass Sex Age Fare Survived
0 3 1 22.0 7.2500 0
1 1 0 38.0 71.2833 1
2 3 0 26.0 7.9250 1
3 1 0 35.0 53.1000 1
4 3 1 35.0 8.0500 0
... ... ... ... ... ...
886 2 1 27.0 13.0000 0
887 1 0 19.0 30.0000 1
888 3 0 NaN 23.4500 0
889 1 1 26.0 30.0000 1
890 3 1 32.0 7.7500 0
891 rows × 5 columns

le = LabelEncoder()
X.Sex = le.fit_transform(X.Sex)
X
X = X.dropna()
Y = X['Survived']
X = X.drop('Survived',axis='columns')
Y

0 0
1 1
2 1
3 1
4 0
 ..
885 0
886 0

from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,_Y_test = train_test_split(X,Y,test_size=0.2)
from sklearn import tree
# from sklearn.linear_model import LogisticRegression
▾ DecisionTreeClassifier
DecisionTreeClassifier()
model = tree.DecisionTreeClassifier()
# model = LogisticRegression()
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
predictions

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1,
 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0,
 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1,
 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1])
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Output &amp;amp; Accuracy
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;model.score(X_test,_Y_test)
0.7482517482517482

model.predict([[1,0,39,71.2833]])
[1]

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(_Y_test,predictions)
import seaborn as sn
sn.heatmap(cm,annot=True)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;a href="https://res.cloudinary.com/practicaldev/image/fetch/s--HBNR5pjI--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/4b8vgx0i5k6x37cajvqk.png" class="article-body-image-wrapper"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--HBNR5pjI--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/4b8vgx0i5k6x37cajvqk.png" alt="Confusion matrix of decision tree" width="800" height="584"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;As mentioned in the code from the decision tree classifier, We get an overall accuracy of about 75% (as shown in code)&lt;/em&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Demonstrating Random Forest by using IRIS dataset</title>
      <dc:creator>Rupak Biswas</dc:creator>
      <pubDate>Tue, 03 Oct 2023 18:35:18 +0000</pubDate>
      <link>https://dev.to/rupak2001/demonstrating-random-forest-by-using-iris-dataset-2n31</link>
      <guid>https://dev.to/rupak2001/demonstrating-random-forest-by-using-iris-dataset-2n31</guid>
      <description>&lt;p&gt;Random forest analysis was performed to evaluate the importance of a series of explanatory variables in predicting a binary, categorical response variable. The following explanatory variables were included as possible contributors to a random forest evaluating Flower type (output) includes the petal length, petal width, sepal length and sepal width.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;This is my copy of colab notebook so evryone can see the output along with the code&lt;/strong&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  Python Code:
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
classes = iris['target_names']
classes
array(['setosa', 'versicolor', 'virginica'], dtype='&amp;lt;U10')
X=iris['data']
Y=iris['target']
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import RandomForestClassifier
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=50)
model = RandomForestClassifier(n_estimators=50)
model.fit(X_train,Y_train)
model.score(X_test,Y_test)
0.9555555555555556
Output
print("actual result:",classes[Y_test[2]])
print("predicted result:",classes[model.predict([X_test[2]])[0]])
actual result: virginica
predicted result: virginica
predictions = model.predict(X_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test,predictions)
import seaborn as sn
sn.heatmap(cm,annot=True)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;strong&gt;Confusion Matrix Created based on predictions done on test dataset&lt;/strong&gt;&lt;br&gt;
&lt;em&gt;X axis represents actual values &amp;amp; Y_axis represents predicted values&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Output and accuracy&lt;/strong&gt;&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;model.score(X_test,Y_test)
0.9555555555555556
Output
print("actual result:",classes[Y_test[2]])
print("predicted result:",classes[model.predict([X_test[2]])[0]])
actual result: virginica
predicted result: virginica
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;&lt;a href="https://res.cloudinary.com/practicaldev/image/fetch/s--UeKCXXNo--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/9se7895ugp94dgxji56c.png" class="article-body-image-wrapper"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--UeKCXXNo--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_800/https://dev-to-uploads.s3.amazonaws.com/uploads/articles/9se7895ugp94dgxji56c.png" alt="Confusion Matrix Created based on predictions done on test dataset" width="800" height="696"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;As mentioned in the code from the random tree classifier, We get an overall accuracy of about 96% (as shown in code)&lt;/strong&gt;&lt;/p&gt;

</description>
    </item>
  </channel>
</rss>
