# Find the exact / proper K
# Elbow method
import pandas as pd
import numpy as np
ds = pd.read_csv(“Iris.csv”)
ds.head()
from matplotlib import pyplot as pl
pl.scatter(ds[‘PetalLength’],ds[‘PetalWidth’])
from sklearn.cluster import KMeans
kmean = KMeans(n_clusters = 3)
KMeans(n_clusters=3)
y_predict = kmean.fit_predict(ds[[‘PetalLength’,’PetalWidth’]])
y_predict
ds[‘cluster’] = y_predict
ds
kmean.cluster_centers_
ds1 = ds[ds.cluster == 0]
ds2 = ds[ds.cluster == 1]
ds3 = ds[ds.cluster == 2]
pl.scatter(ds1.PetalLength,ds1.PetalWidth, color = ‘blue’)
pl.scatter(ds2.PetalLength,ds2.PetalWidth, color = ‘red’)
pl.scatter(ds3.PetalLength,ds3.PetalWidth, color = ‘green’)
pl.scatter(kmean.cluster_centers_[:,0], kmean.cluster_centers_[:,1], color = ‘black’, marker=‘D’)
pl.xlabel(‘Petal Length’)
pl.ylabel(‘Petal Width’)
pl.legend()
pl.show()
# since values of x and y are mismatch
# so we need to scale the values
from sklearn.preprocessing import MinMaxScaler
scl = MinMaxScaler()
scl.fit(ds[[‘PetalLength’]])
ds[‘PetalLength’] = scl.transform(ds[[‘PetalLength’]])
scl.fit(ds[[‘PetalWidth’]])
ds[‘PetalWidth’] = scl.transform(ds[[‘PetalWidth’]])
# Applying KMean once again
kmean = KMeans(n_clusters = 3)
y_predict = kmean.fit_predict(ds[[‘PetalLength’, ‘PetalWidth’]])
y_predict
ds[‘cluster’] = y_predict
ds
ds1 = ds[ds.cluster == 0]
ds2 = ds[ds.cluster == 1]
ds3 = ds[ds.cluster == 2]
pl.scatter(ds1.PetalLength,ds1.PetalWidth, color = ‘blue’)
pl.scatter(ds2.PetalLength,ds2.PetalWidth, color = ‘red’)
pl.scatter(ds3.PetalLength,ds3.PetalWidth, color = ‘green’)
pl.xlabel(‘Petal Length’)
pl.ylabel(‘Petal Width’)
k_range = range(1,10)
sse = []
for k in k_range:
kmean = KMeans(n_clusters = k)
kmean.fit(ds[[‘PetalLength’,’PetalWidth’]])
sse.append(kmean.inertia_)
sse
[28.391514358368717,
5.179687509974783,
1.7050986081225123,
1.1621031930971286,
0.8570856553216398,
0.6833274904190353,
0.5683512655008139,
0.48911635449076774,
0.4155388630360096]
pl.xlabel(‘K’)
pl.plot(k_range, sse)