import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris.feature_names
ds = pd.DataFrame(iris.data, columns=iris.feature_names)
# Appending one column as target column
ds[‘target’] = iris.target
ds.head()
# 0 ----> Setosa 1-->Versicolor 2---> Virginica
iris.target_names
array([‘setosa’, ‘versicolor’, ‘virginica’], dtype=‘<U10’)
#Want to see the number of rows for each flower type
ds[ds.target==2]
…
…
…
…
# Want to append flower name column ---> based on index (0, 1, 2) i.e Setosa and all
ds[‘fname’] = ds.target.apply(lambda x: iris.target_names[x])
ds.head()
from matplotlib import pyplot as pl
# Create 3 DF for three flowers
ds1 = ds[ds.target==0]
ds2 = ds[ds.target==1]
ds3 = ds[ds.target==2]
pl.scatter(ds1[‘sepal length (cm)’], ds1[‘sepal width (cm)’], color = ‘red’, marker = ‘o’)
pl.scatter(ds2[‘sepal length (cm)’], ds2[‘sepal width (cm)’], color = ‘blue’, marker = ‘o’)
pl.xlabel(‘Sepal Length’)
pl.scatter(ds1[‘petal length (cm)’], ds1[‘petal width (cm)’], color = ‘red’, marker = ‘o’)
pl.scatter(ds2[‘petal length (cm)’], ds2[‘petal width (cm)’], color = ‘blue’, marker = ‘o’)
pl.xlabel(‘Petal Length’)
pl.ylabel(‘Petal Width’)
from sklearn.model_selection import train_test_split
X = ds.drop([‘target’,’fname’], axis = ‘columns’)
X.head()
y = ds[‘target’]
y.head()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
from sklearn.svm import SVC
svmodel = SVC(kernel=‘linear’)
svmodel.fit(X_train, y_train)
SVC(kernel=‘linear’)
# Accuracy of model
svmodel.score(X_train, y_train)
0.9916666666666667