Artificial Intelligence and Machine Learning
ISBN 9788119221196

Highlights

Notes

  

Chapter 7: Implementation of K-Means Clustering

# Find the exact / proper K

# Elbow method

import pandas as pd

import numpy as np

ds = pd.read_csv(“Iris.csv”)

ds.head()

from matplotlib import pyplot as pl

pl.scatter(ds[‘PetalLength’],ds[‘PetalWidth’])

from sklearn.cluster import KMeans

kmean = KMeans(n_clusters = 3)

kmean

KMeans(n_clusters=3)

y_predict = kmean.fit_predict(ds[[‘PetalLength’,’PetalWidth’]])

y_predict

ds[‘cluster’] = y_predict

ds

kmean.cluster_centers_

ds1 = ds[ds.cluster == 0]

ds2 = ds[ds.cluster == 1]

ds3 = ds[ds.cluster == 2]

pl.scatter(ds1.PetalLength,ds1.PetalWidth, color = ‘blue’)

pl.scatter(ds2.PetalLength,ds2.PetalWidth, color = ‘red’)

pl.scatter(ds3.PetalLength,ds3.PetalWidth, color = ‘green’)

pl.scatter(kmean.cluster_centers_[:,0], kmean.cluster_centers_[:,1], color = ‘black’, marker=‘D’)

pl.xlabel(‘Petal Length’)

pl.ylabel(‘Petal Width’)

pl.legend()

pl.show()

# since values of x and y are mismatch

# so we need to scale the values

from sklearn.preprocessing import MinMaxScaler

scl = MinMaxScaler()

scl.fit(ds[[‘PetalLength’]])

ds[‘PetalLength’] = scl.transform(ds[[‘PetalLength’]])

scl.fit(ds[[‘PetalWidth’]])

ds[‘PetalWidth’] = scl.transform(ds[[‘PetalWidth’]])

# Applying KMean once again

kmean = KMeans(n_clusters = 3)

y_predict = kmean.fit_predict(ds[[‘PetalLength’, ‘PetalWidth’]])

y_predict

ds[‘cluster’] = y_predict

ds

ds1 = ds[ds.cluster == 0]

ds2 = ds[ds.cluster == 1]

ds3 = ds[ds.cluster == 2]

pl.scatter(ds1.PetalLength,ds1.PetalWidth, color = ‘blue’)

pl.scatter(ds2.PetalLength,ds2.PetalWidth, color = ‘red’)

pl.scatter(ds3.PetalLength,ds3.PetalWidth, color = ‘green’)

pl.xlabel(‘Petal Length’)

pl.ylabel(‘Petal Width’)

pl.show()

k_range = range(1,10)

sse = []

for k in k_range:

 kmean = KMeans(n_clusters = k)

 kmean.fit(ds[[‘PetalLength’,’PetalWidth’]])

 sse.append(kmean.inertia_)

sse

[28.391514358368717,

5.179687509974783,

1.7050986081225123,

1.1621031930971286,

0.8570856553216398,

0.6833274904190353,

0.5683512655008139,

0.48911635449076774,

0.4155388630360096]

pl.xlabel(‘K’)

pl.ylabel(‘SSE’)

pl.plot(k_range, sse)