Artificial Intelligence and Machine Learning
ISBN 9788119221196

Highlights

Notes

  

Chapter 5: Study of Dimension Reduction

Feature Scaling ◀◀◀

import numpy as np

import pandas as pd

from sklearn.feature_selection import SelectKBest

from sklearn.feature_selection import chi2

df = pd.read_csv(“FeatureSelection.csv”)

df.head()

X = df.iloc[:,0:5]

X.head()

y = df.iloc[:,-1]

y.head()

bestfeat = SelectKBest(score_func=chi2, k=4)

fit = bestfeat.fit(X,y)

datascore = pd.DataFrame(fit.scores_)

datacol = pd.DataFrame(X.columns)

fscore = pd.concat([datacol, datascore], axis = 1)

fscore.columns = [“best”, “score”]

fscore

from sklearn.ensemble import ExtraTreesClassifier

import matplotlib.pyplot as pl

model= ExtraTreesClassifier()

model.fit(X,y)

ExtraTreesClassifier()

model.feature_importances_

array([0.29935847, 0.15434854, 0.14141038, 0.21183862, 0.19304398])

featimport = pd.Series(model.feature_importances_, index=X.columns)

featimport.nlargest(5).plot(kind=‘bar’)

pl.show()

# Seaborn -----> Heatmap ---->

import seaborn as sns

corrmat = df.corr()

corrfeat = corrmat.index

pl.figure(figsize=(20,20))

a= sns.heatmap(df[corrfeat].corr(), annot=True, cmap=“Blues”)

Normalization ◀◀◀

import numpy as np

import pandas as pd

ds = pd.read_csv(“FeatureSelection.csv”)

ds.head()

# Simple feature scaling

for column in ds.columns:

 ds[column]=ds[column]/ds[column].abs().max()

ds.head()

import matplotlib.pyplot as pl

ds.plot(kind=‘bar’)

# Min Max method xold=(xold-xmin)/(xmax-xmin)

ds1= ds.copy()

for column in ds1.columns:

 ds1[column]= (ds1[column] - ds1[column].min()) / (ds1[column].max() - ds1[column].min())

ds1.head()

ds1.plot(kind=‘bar’)

# Standardization (Z score method or 0 mean)

ds2=ds.copy()

for column in ds2.columns:

 ds2[column]=(ds2[column] - ds2[column].mean()) / ds2[column].std()

ds2.head()

ds2.plot(kind=‘bar’)