import numpy as np # Linear Algebra
import pandas as pd # Data Frames
import matplotlib.pyplot as plt # Visualization
from mpl_toolkits.mplot3d import axes3d # 3D Visualization
import ipywidgets as widgets # Interactivity
from IPython.display import display # Display Widgets
import time # To Track Time
%matplotlib notebook
def sigmoid(x, grad=False):
if grad:
return sigmoid(x) * (1-sigmoid(x))
return 1/(1+np.exp(-x))
epsilon = 1e-10
import sklearn.datasets as datasets
cancer = datasets.load_breast_cancer()
cancer.keys()
print("Feature Names:\n", cancer['feature_names'], "\n\nLabel Names:\n", cancer['target_names'])
cancer['data'].shape, cancer['target'].shape
df = pd.DataFrame(cancer['data'], columns=cancer['feature_names'])
df.head()
X = cancer['data'][:480]
Y = cancer['target'][:480]
X_val = cancer['data'][480:]
Y_val = cancer['target'][480:]
def model(theta, X):
return sigmoid(np.dot(theta, X)) # Vectorize the Calculations
def train(x, y, learning_rate=3e-6, iterations=1, first=False):
global theta, prev_theta
prev_theta = theta
X = np.vstack([np.ones(y.shape[0]), x])
for _ in range(iterations):
# Model
pred = model(theta, X)
# Calculations for Backpropagation
error = np.mean((y * np.log(pred + epsilon)) + ((1-y) * np.log(1-pred + epsilon)), -1)
cost = - error
dcost_dtheta = np.dot(X, pred-y)
theta = theta - (dcost_dtheta * learning_rate)
class_pred = np.round(pred)
acc = np.sum(class_pred == y)/len(y)
return cost, dcost_dtheta, acc
theta = np.random.randn(X.shape[1]+1)
print(theta.shape)
epochs = 20
total_time = time.time()
start = time.time()
for i in range(1, epochs+1):
lr = 4e-8 if i <= 15 else 2e-8
cost, dcost_dtheta, acc = train(X.T, Y, learning_rate=lr, iterations=20000)
print('Epoch {} - Cost: {:.3f} | Accuracy: {:.2f}%\nTime: {:.2f}s\n'.format(i, cost, acc*100, time.time()-start))
start = time.time()
print('Total Time Taken: {:.2f}s'.format(time.time()-total_time))
Xs = np.vstack([np.ones(Y.shape[0]), X.T])
modelpred = model(theta, Xs)
print(- np.mean((Y * np.log(modelpred + epsilon)) + ((1-Y) * np.log(1-modelpred + epsilon)))) # Cross Entropy Loss
print((np.round(modelpred) == Y).sum() / Y.shape[0]) # Accuracy
Xs = np.vstack([np.ones(Y_val.shape[0]), X_val.T])
modelpred = model(theta, Xs)
print(- np.mean((Y_val * np.log(modelpred + epsilon)) + ((1-Y_val) * np.log(1-modelpred + epsilon)))) # Cross Entropy Loss
print((np.round(modelpred) == Y_val).sum() / Y_val.shape[0]) # Accuracy
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='liblinear')
model.fit(X, Y)
model.score(X, Y)
model.score(X_val, Y_val)
skpred = model.predict(X)
- np.mean((Y * np.log(skpred + epsilon)) + ((1-Y) * np.log(1-skpred + epsilon))) # Cross Entropy Loss
skpred = model.predict(X_val)
- np.mean((Y_val * np.log(skpred + epsilon)) + ((1-Y_val) * np.log(1-skpred + epsilon))) # Cross Entropy Loss
num_bags = 250
bag_size = 250
bags = []
for i in range(num_bags):
idx = np.random.choice(np.arange(X.shape[0]), bag_size)
bags.append([X[idx], Y[idx]])
models = []
for bag in bags:
models.append(LogisticRegression(solver='liblinear'))
models[-1].fit(bag[0], bag[1])
skpreds = []
for model in models:
skpreds.append(model.predict(X))
avg_preds = np.array(skpreds).mean(0)
print(- np.mean((Y * np.log(avg_preds + epsilon)) + ((1-Y) * np.log(1-avg_preds + epsilon)))) # Cross Entropy Loss
print((np.round(avg_preds) == Y).sum() / Y.shape[0])
skpreds = []
for model in models:
skpreds.append(model.predict(X_val))
avg_preds = np.array(skpreds).mean(0)
print(- np.mean((Y_val * np.log(avg_preds + epsilon)) + ((1-Y_val) * np.log(1-avg_preds + epsilon)))) # Cross Entropy Loss
print((np.round(avg_preds) == Y_val).sum() / Y_val.shape[0])
from sklearn.naive_bayes import GaussianNB
NB = GaussianNB()
NB.fit(X, Y)
NB.score(X, Y)
from sklearn.tree import DecisionTreeClassifier
dec_tree = DecisionTreeClassifier()
dec_tree.fit(X, Y)
dec_tree.score(X, Y)
from sklearn.svm import SVC
SVM1 = SVC(kernel='linear')
SVM2 = SVC()
SVM1.fit(X, Y)
SVM2.fit(X, Y)
SVM1.score(X, Y), SVM2.score(X, Y)
from sklearn.ensemble import RandomForestClassifier
RFC = RandomForestClassifier()
RFC.fit(X, Y)
RFC.score(X, Y)