Artificial Intelligence and Machine Learning
ISBN 9788119221196

Highlights

Notes

  

Chapter 10: Study of Boosting Algorithms

Ada Boost ◀◀◀

import pandas as pd

ds = pd.read_csv(r’addsdataset.csv’)

# X is for input, y for output

X = ds.iloc[:,[2,3]].values

y = ds.iloc[:, 4].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0, shuffle = False)

# Features scaling

from sklearn.preprocessing import StandardScaler

sd = StandardScaler()

X_train = sd.fit_transform(X_train)

X_test = sd.transform(X_test)

from sklearn.ensemble import AdaBoostClassifier

classifier = AdaBoostClassifier()

classifier.fit(X_train, y_train)

AdaBoostClassifier()

y_pred = classifier.predict(X_test)

y_test

y_pred

from sklearn.metrics import confusion_matrix, accuracy_score

ac = accuracy_score(y_test, y_pred)

ac

0.82

# Improvement with Random Forest Algorithm

from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(max_depth = 2, random_state = 0)

classifierNew = AdaBoostClassifier(base_estimator = RF, n_estimators = 100, learning_rate = 0.01, random_state = 0)

classifierNew.fit(X_train, y_train)

y_pred = classifierNew.predict(X_test)

ac = accuracy_score(y_test, y_pred)

ac

0.94

# Deployment

# In ANACONDA PROMT type below command

# conda install -c anaconda flask

# FROM GOOGLE INSTALL POSTMAN

# Creating pkl file

with open(‘model.pkl’,’wb’) as file:

 pickle.dump(classifier, file)

with open(‘modelNew.pkl’,’wb’) as file:

 pickle.dump(classifierNew, file)

import flask

from flask import Flask, request

import pickle

model_adaboost = pickle.load(open(‘modelNew.pkl’, ‘rb’))

app = Flask(__name__)

#Get method -> Read / Retrieve

@app.route(‘/’, methods = [‘GET’, ‘POST’])

def main():

 return “Ada boost with flask”

@app.route(‘/classify’, methods = [‘GET’])

def classify():

 if flask.request.method == ‘GET’:

  Age = request.args.get(‘age’) # we will call the data from API using Postman

  EstimatedSalary = request.args.get(‘salary’)

  prediction = model_adaboost.predict([[Age, EstimatedSalary]])

  print(prediction)

  if prediction == 1:

   return “there is a chance to purchase things”

  else:

   return “sorry, no chance”

  else:

   return “Select GET method”

if __name__ == ‘__main__’:

 app.run()

Testing the service using Postman

Stochastic Gradient Boosting◀◀◀

import pandas as pd

ds = pd.read_csv(“IrisNew.csv”)

ds.head()

X = ds.iloc[:,1:5]

y = ds.iloc[:,5]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

from sklearn.linear_model import SGDClassifier

sgd = SGDClassifier(loss = ‘hinge’,penalty=‘l2’,max_iter=1000, random_state=None,learning_rate= ‘optimal’)

sgd.fit(X_train, y_train)

SGDClassifier()

y_pred = sgd.predict(X_test)

from sklearn.metrics import confusion_matrix, accuracy_score

sgd = confusion_matrix(y_test, y_pred)

sgd

ac_sgd = accuracy_score(y_test, y_pred)

ac_sgd

0.9333333333333333

Voting Ensemble ◀◀◀

It combines all the algorithms. Here we find the probability and consider binary classification.

import pandas as pd

import numpy as np

ds = pd.read_csv(‘diabetes.csv’)

ds.head()’

X = ds.iloc[:,0:8]

y = ds.iloc[:, 8]

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X = sc.fit_transform(X)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# Applying 5 different algrithm on single dataset

# Applying Logistic Regression [82%]

from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()

logreg.fit(X_train, y_train)

y_pred_log = logreg.predict(X_test)

from sklearn.metrics import accuracy_score

ac = accuracy_score(y_test, y_pred_log)

ac

0.8246753246753247

# Applying Decision Tree [79%]

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()

dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

from sklearn.metrics import accuracy_score

ac = accuracy_score(y_test, y_pred_dt)

ac

0.7922077922077922

# Applying KNN [80%]

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()

knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)

from sklearn.metrics import accuracy_score

ac = accuracy_score(y_test, y_pred_knn)

ac

0.8051948051948052

# Applying Random Forest [81%]

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=1000, random_state=0)

rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

from sklearn.metrics import accuracy_score

ac = accuracy_score(y_test, y_pred_rf)

ac

0.8181818181818182

# Applying Adaboost [74%]

from sklearn.ensemble import AdaBoostClassifier

adb = AdaBoostClassifier(n_estimators=1000, random_state=0)

adb.fit(X_train, y_train)

y_pred_adb = adb.predict(X_test)

from sklearn.metrics import accuracy_score

ac = accuracy_score(y_test, y_pred_adb)

ac

0.7467532467532467

# Voting

# Soft Voting without weight

from sklearn.ensemble import VotingClassifier

vc = VotingClassifier(estimators = [(‘LogisticReg’, logreg),

              (‘DecisionTree’,dt),

              (‘RandomForest’,rf),

              (‘AdaBoost’,adb),

              (‘Kneighbor’,knn)], voting = ‘soft’)

vc.fit(X_train, y_train)

y_pred_vc = vc.predict(X_test)

from sklearn.metrics import accuracy_score

ac = accuracy_score(y_test, y_pred_vc)

ac

0.8311688311688312

# Soft Voting with weight (Scores ----0%) name, model

def get_model():

 models = list()

 models.append((‘lr’, LogisticRegression()))

 models.append((‘dt’, DecisionTreeClassifier()))

 models.append((‘knn’, KNeighborsClassifier()))

 models.append((‘rf’, RandomForestClassifier()))

 models.append((‘adb’, AdaBoostClassifier()))

 return models

def evaluate_model(models, X_train, X_test, y_train, y_test):

 scores = list()

 for name, model in models:

  model.fit(X_train, y_train)

  yhat = model.predict(X_test)

  acc = accuracy_score(y_test, yhat)

  scores.append(acc)

 return scores

models = get_model() # create a base model

scores = evaluate_model(models, X_train, X_test, y_train, y_test)

vc1 = VotingClassifier(estimators = models, voting = ‘soft’, weights = scores)

vc1.fit(X_train, y_train)

y_pred_vc1 = vc1.predict(X_test)

0.8181818181818182

# Hard voting with weights

vc2 = VotingClassifier(estimators = models, voting = ‘hard’, weights = scores)

vc2.fit(X_train, y_train)

y_pred_vc2 = vc2.predict(X_test)

acc_hd = accuracy_score(y_test, y_pred_vc2)

acc_hd

0.8441558441558441