import pandas as pd
ds = pd.read_csv(r’addsdataset.csv’)
# X is for input, y for output
X = ds.iloc[:,[2,3]].values
y = ds.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0, shuffle = False)
# Features scaling
from sklearn.preprocessing import StandardScaler
sd = StandardScaler()
X_train = sd.fit_transform(X_train)
X_test = sd.transform(X_test)
from sklearn.ensemble import AdaBoostClassifier
classifier = AdaBoostClassifier()
classifier.fit(X_train, y_train)
AdaBoostClassifier()
y_pred = classifier.predict(X_test)
y_test
y_pred
from sklearn.metrics import confusion_matrix, accuracy_score
ac = accuracy_score(y_test, y_pred)
ac
0.82
# Improvement with Random Forest Algorithm
from sklearn.ensemble import RandomForestClassifier
RF = RandomForestClassifier(max_depth = 2, random_state = 0)
classifierNew = AdaBoostClassifier(base_estimator = RF, n_estimators = 100, learning_rate = 0.01, random_state = 0)
classifierNew.fit(X_train, y_train)
y_pred = classifierNew.predict(X_test)
ac = accuracy_score(y_test, y_pred)
ac
0.94
# Deployment
# In ANACONDA PROMT type below command
# conda install -c anaconda flask
# FROM GOOGLE INSTALL POSTMAN
# Creating pkl file
with open(‘model.pkl’,’wb’) as file:
pickle.dump(classifier, file)
with open(‘modelNew.pkl’,’wb’) as file:
pickle.dump(classifierNew, file)
import flask
from flask import Flask, request
import pickle
model_adaboost = pickle.load(open(‘modelNew.pkl’, ‘rb’))
#Get method -> Read / Retrieve
@app.route(‘/’, methods = [‘GET’, ‘POST’])
def main():
return “Ada boost with flask”
@app.route(‘/classify’, methods = [‘GET’])
def classify():
if flask.request.method == ‘GET’:
Age = request.args.get(‘age’) # we will call the data from API using Postman
EstimatedSalary = request.args.get(‘salary’)
prediction = model_adaboost.predict([[Age, EstimatedSalary]])
print(prediction)
if prediction == 1:
return “there is a chance to purchase things”
else:
return “sorry, no chance”
else:
return “Select GET method”
if __name__ == ‘__main__’:
app.run()
import pandas as pd
ds = pd.read_csv(“IrisNew.csv”)
ds.head()
X = ds.iloc[:,1:5]
y = ds.iloc[:,5]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(loss = ‘hinge’,penalty=‘l2’,max_iter=1000, random_state=None,learning_rate= ‘optimal’)
sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_test)
from sklearn.metrics import confusion_matrix, accuracy_score
sgd = confusion_matrix(y_test, y_pred)
sgd
ac_sgd = accuracy_score(y_test, y_pred)
ac_sgd
0.9333333333333333
It combines all the algorithms. Here we find the probability and consider binary classification.
import pandas as pd
import numpy as np
ds = pd.read_csv(‘diabetes.csv’)
ds.head()’
X = ds.iloc[:,0:8]
y = ds.iloc[:, 8]
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Applying 5 different algrithm on single dataset
# Applying Logistic Regression [82%]
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred_log = logreg.predict(X_test)
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test, y_pred_log)
ac
0.8246753246753247
# Applying Decision Tree [79%]
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test, y_pred_dt)
ac
0.7922077922077922
# Applying KNN [80%]
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test, y_pred_knn)
ac
0.8051948051948052
# Applying Random Forest [81%]
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=1000, random_state=0)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test, y_pred_rf)
ac
0.8181818181818182
# Applying Adaboost [74%]
from sklearn.ensemble import AdaBoostClassifier
adb = AdaBoostClassifier(n_estimators=1000, random_state=0)
adb.fit(X_train, y_train)
y_pred_adb = adb.predict(X_test)
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test, y_pred_adb)
ac
0.7467532467532467
# Voting
# Soft Voting without weight
from sklearn.ensemble import VotingClassifier
vc = VotingClassifier(estimators = [(‘LogisticReg’, logreg),
(‘DecisionTree’,dt),
(‘RandomForest’,rf),
(‘AdaBoost’,adb),
(‘Kneighbor’,knn)], voting = ‘soft’)
y_pred_vc = vc.predict(X_test)
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test, y_pred_vc)
ac
0.8311688311688312
# Soft Voting with weight (Scores ----0%) name, model
def get_model():
models = list()
models.append((‘lr’, LogisticRegression()))
models.append((‘dt’, DecisionTreeClassifier()))
models.append((‘knn’, KNeighborsClassifier()))
models.append((‘rf’, RandomForestClassifier()))
models.append((‘adb’, AdaBoostClassifier()))
return models
def evaluate_model(models, X_train, X_test, y_train, y_test):
scores = list()
for name, model in models:
model.fit(X_train, y_train)
yhat = model.predict(X_test)
acc = accuracy_score(y_test, yhat)
scores.append(acc)
return scores
models = get_model() # create a base model
scores = evaluate_model(models, X_train, X_test, y_train, y_test)
vc1 = VotingClassifier(estimators = models, voting = ‘soft’, weights = scores)
vc1.fit(X_train, y_train)
y_pred_vc1 = vc1.predict(X_test)
0.8181818181818182
# Hard voting with weights
vc2 = VotingClassifier(estimators = models, voting = ‘hard’, weights = scores)
vc2.fit(X_train, y_train)
y_pred_vc2 = vc2.predict(X_test)
acc_hd = accuracy_score(y_test, y_pred_vc2)
acc_hd
0.8441558441558441