Artificial Intelligence and Machine Learning
ISBN 9788119221196

Highlights

Notes

  

Chapter 6: Study of Principal Components Analysis

import numpy as np

import pandas as pd

import matplotlib.pyplot as pl

ds = pd.read_csv(“wine.csv”)

ds.head()

X = ds.iloc[:,0:13]

X.head()

y = ds.iloc[:,-1]

y.head()

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2)

# We will extract any two columns as Principal Component

# With the help of EigenValue

# First we will standardize the data

from sklearn.preprocessing import StandardScaler

st = StandardScaler()

# fit and transform

X_train = st.fit_transform(X_train)

X_train

X_test = st.transform(X_test)

# Applying PCA

from sklearn.decomposition import PCA

# Check Eigenvalue of components

pca = PCA(n_components = 2)

X_train = pca.fit_transform(X_train)

X_test = pca.transform(X_test)

pca.explained_variance_ratio_

array([0.36138769, 0.1937306])

# Sorted ..... Descending order

# Now classification

from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()

logreg.fit(X_train, y_train)

LogisticRegression()

y_test

y_predict = logreg.predict(X_test)

y_predict

array([1, 3, 2, 3, 1, 1, 2, 2, 2, 1, 1, 3, 2, 3, 2, 1, 3, 3, 1, 1, 3, 2,

  1, 1, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 2], dtype=int64)

# check with Confusion Metrix for Actual vs Predict

from sklearn.metrics import confusion_matrix

c = confusion_matrix(y_test, y_predict)

c

array([[13, 1, 0],

  [1, 11, 1],

  [0, 0, 9]], dtype=int64)

# 3 category

# Visualize 2 components (0th and 1st)

X_disp, y_disp = X_train, y_train

pl.scatter(X_disp[y_disp == 1,0], X_disp[y_disp == 1,1], label = ‘one’)

pl.scatter(X_disp[y_disp == 2,0], X_disp[y_disp == 2,1], label = ‘two’)

pl.scatter(X_disp[y_disp == 3,0], X_disp[y_disp == 3,1], label = ‘three’)

pl.legend()

pl.show()