ml libraries
-pandas
reading dataset
df=pd.read_csv("address")
df=pd.read_excel("address",sheet_name="sheet",usecols=['coloumn1'])
viewing dataset
print(df.head)
print(df.tail)
print(df.sample(5))
daatbase info
db.info() #guves full information
db.describe()  #gives satistics
db.shape 
selecting coloumns
coloumn=df["coloumn_name"]
subset=df[["coloumn1","coloumn2"]]
addtwo compatable coloumns
df["new"]=df["col1"]+df["col2"]
filterrows
coloumn=df[df['coloumn']>10]
mergetwo dataframe
newdf=pd.merge(df1,df2,on="commen_coloumn")
missing values
-check
values=df.isnull().sum()
-fill with values
df['col'].filna(0,implace=True)
-drop
df=df.dropna()
-duplicate
df.df.duplicate().sum()
add new coloumn
y=np.array([1,2,3,4])
df['col_new']=y
drop col
df.drop(coloummns=['col_name'],implace=True)
ilock function
x=df.iloc[:,:-1]   #all but last coloumn
x=df.iloc[:.-1]  #only last coloumn
numpuy
mean=np.mean(data)
st_dev=np.std(data)
median=np.median(data)
var=np.var(data)
    
Top comments (0)