import numpy as np # Linear Algebra
import pandas as pd # Data Frames
import matplotlib.pyplot as plt # Visualization
import matplotlib.cm as cm # Color Mapping
from mpl_toolkits.mplot3d import axes3d # 3D Visualization
import ipywidgets as widgets # Interactivity
from IPython.display import display # Display Widgets
%matplotlib notebook
def sigmoid(x, grad=False):
if grad:
return sigmoid(x) * (1-sigmoid(x))
return 1/(1+np.exp(-x))
print('Sigmoid 6.5: {:.4f}'.format(sigmoid(6.5)))
print('Sigmoid 1.2: {:.4f}'.format(sigmoid(1.2)))
print('Sigmoid -8: {:.4f}'.format(sigmoid(-8)))
print('Sigmoid -4.5: {:.4f}'.format(sigmoid(-4.5)))
x = np.array([1., 1.9, 1.2, 4., 3.2, 2.4, 3.6, 4.6])
y = np.array([0, 0, 0, 1, 0, 1, 1, 1])
x = np.array([1., 1.9, 1.2, 4., 3.2, 2.4, 3.6, 4.6])
x = x - x.mean()
y = np.array([0, 0, 0, 1, 0, 1, 1, 1])
pd.DataFrame({'x':x,'y':y})
fig = plt.figure(figsize=(4,3))
plt.scatter(x,y)
plt.show()
2) Model with 1 Parameter (Gradient Only)
def model(w, x):
return sigmoid(w * x)
def plot_model(title, init_weight=None):
line_x = np.arange(-3, 8)
if not init_weight:
init_weight = np.random.randn(1)
fig = plt.figure(figsize=(7,4))
ax = fig.add_subplot(1,1,1)
plt.suptitle(title, fontsize=15)
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
# plt.axis([-4, 9, -1, 2])
ax.axhline(0, color='black')
ax.axvline(0, color='black')
ax.axhline(0.5, color='black', linewidth=0.5)
ax.scatter(x, y, c='red')
line, = ax.plot(line_x, model(init_weight, line_x), c='green')
def update(weight=init_weight):
line.set_ydata(model(weight, line_x))
fig.canvas.draw()
gradient = widgets.FloatText(value=init_weight, description='Gradient', step=0.1)
display(gradient)
widgets.interactive(update, weight=gradient)
plot_model('Model with 1 Variable')
size = 50
w_sim = np.linspace(-10, 15, size)
w_matrix = w_sim.reshape(size, 1)
x_matrix = x.reshape(1, x.shape[0])
y_matrix = y.reshape(1, y.shape[0])
# Mean Squared Error as Cost Function
pred_sim = np.array([sigmoid(z) for z in w_matrix * x_matrix])
error_sim = (pred_sim - y_matrix)
cost_sim = np.sum(error_sim**2, 1)/len(y)
# Cross Entropy Loss as Cost Function
epsilon = 1e-8 # To Prevent -inf in log(pred)
cel_error = ((y_matrix * np.log(pred_sim + epsilon)) + ((1-y_matrix) * np.log(1-pred_sim + epsilon)))
cel_cost = -np.sum(cel_error, len(cel_error.shape)-1)/len(y)
print('W Sim:\n', w_sim, '\n\nMean Squared Error Sim:\n', cost_sim, '\n\nCross Entropy Loss Sim', cel_cost)
fig = plt.figure(figsize=(7,4))
ax = fig.add_subplot(111)
plt.title('Mean Squared Error Cost Curve', fontsize=20)
ax.set_xlabel('w', fontsize=15)
ax.set_ylabel('Cost', fontsize=15)
ax.plot(w_sim, cost_sim)
plt.show()
fig = plt.figure(figsize=(7,4))
ax = fig.add_subplot(111)
plt.title('Cross Entropy Loss Cost Curve', fontsize=20)
ax.set_xlabel('w', fontsize=15)
ax.set_ylabel('Cost', fontsize=15)
ax.plot(w_sim, cel_cost)
plt.show()
minimum_cost_index = np.argmin(cost_sim)
best_w = w_sim[minimum_cost_index]
print('Minimum Index:', minimum_cost_index, \
'\nCost:', cost_sim[minimum_cost_index], \
'\nGradient:', w_sim[minimum_cost_index])
plot_model('Best Fit based on Simulation', init_weight=best_w)
3) Model with 2 Parameters (Gradient and Y-Intercept)
def model(b, w, x):
return sigmoid(b + w * x)
def plot_model(title, init_weight=None, init_bias=None):
line_x = np.arange(-3, 8)
if not init_weight:
init_weight = np.random.randn(1)
if not init_bias:
init_bias = np.random.randn(1)
fig = plt.figure(figsize=(7,4))
ax = fig.add_subplot(1,1,1)
plt.suptitle(title, fontsize=15)
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
# plt.axis([-2, 7, -1, 2])
ax.axhline(0, color='black')
ax.axvline(0, color='black')
ax.axhline(0.5, color='black', linewidth=0.5)
ax.scatter(x, y, c='red')
line, = ax.plot(line_x, model(init_bias, init_weight, line_x), c='green')
def update(weight=init_weight, bias=init_bias):
line.set_ydata(model(bias, weight, line_x))
fig.canvas.draw()
gradient = widgets.FloatText(value=init_weight, description='Gradient')
bias = widgets.FloatText(value=init_bias, description='Y-Intercept')
display(gradient)
display(bias)
widgets.interactive(update, weight=gradient, bias=bias)
plot_model('Model with 2 Variables')
3.1 Simulation for Visualization
size = 100
w_sim = np.linspace(-10, 20, size)
b_sim = np.linspace(-50, 50, size)
W, B = np.meshgrid(w_sim, b_sim)
w_matrix = w_sim.reshape(size, 1)
b_matrix = b_sim.reshape(1, size)
x_matrix = x.reshape(1, x.shape[0])
y_matrix = y.reshape(1, y.shape[0])
plot_w = np.repeat(w_sim, size)
plot_b = np.tile(b_sim, size)
# (Sw,1) x (1xN) --> (Sw,N) --reshape--> (Sw,N,1)
wx = np.dot(w_matrix, x_matrix).reshape(size, x.shape[0], 1)
# (Sw,N,1) + (1,Sb) --> (Sw,N,Sb) --tranpose--> (Sw,Sb,N)
pred_sim = sigmoid((wx + b_matrix).transpose(0, 2, 1))
# ((Sw,Sb,N) - (1,N))^2 --> (Sw,Sb,N)
error_sim = pred_sim - y_matrix
squared_error_sim = error_sim**2
# Flatten (Sw,Sb) --> (Sw x Sb)
cost_sim_mse = np.sum(squared_error_sim, len(squared_error_sim.shape)-1)/len(y) # Sum (50, 50, 8) in Dim 2 --> (50, 50)
plot_cost = cost_sim_mse.reshape(-1) # Flatten (50,50) --> (2500,)
pd.DataFrame({'Gradient': plot_w, 'Intercept': plot_b, 'Cost': plot_cost}).head(5)
# 3D Visualization
out = widgets.Output(layout={'border': '1px solid black'})
out.append_stdout('Using Mean Squared Error gives us a Non-Convex Cost Curve')
display(out)
fig = plt.figure(figsize=(9,5))
ax = fig.add_subplot(111, projection='3d')
plot = ax.plot_surface(W, B, cost_sim_mse, cmap=cm.nipy_spectral)
cbar=plt.colorbar(plot)
cbar.set_label('\nCost', fontsize=20)
plt.title('Cost Curve\n', fontsize=20)
ax.set_xlabel('Gradient', fontsize=15)
ax.set_ylabel('Y-Intercept', fontsize=15)
ax.set_zlabel(' Cost', fontsize=15)
plt.show()
minimum_cost_index = np.argmin(plot_cost)
MSE_C = plot_cost[minimum_cost_index]
MSE_b = plot_b[minimum_cost_index]
MSE_w = plot_w[minimum_cost_index]
print('Minimum Index:', minimum_cost_index, \
'\nCost:', MSE_C, \
'\nY-Intercept:', MSE_b, \
'\nGradient:', MSE_w)
epsilon = 1e-8 # To Prevent -inf in log(pred)
# ((Sw,Sb,N) - (1,N))^2 --> (Sw,Sb,N)
error = ((y_matrix * np.log(pred_sim + epsilon)) + ((1-y_matrix) * np.log(1-pred_sim + epsilon)))
# Flatten (Sw,Sb) --> (Sw x Sb)
cost_sim_ce = -np.sum(error, len(error.shape)-1)/len(y) # Sum (50, 50, 8) in Dim 2 --> (50, 50)
plot_cost = cost_sim_ce.reshape(-1) # Flatten (50,50) --> (2500,)
# We add Epsilon because log(0) = -inf
# Which will give us error in further calculations
np.log(0)
pd.DataFrame({'Gradient': plot_w, 'Intercept': plot_b, 'Cost': plot_cost}).head(5)
pd.DataFrame({'Gradient': plot_w, 'Intercept': plot_b, 'Cost': plot_cost}).tail(5)
# 3D Visualization
out = widgets.Output(layout={'border': '1px solid black'})
out.append_stdout('Using Cross Entropy Loss gives us a Convex Cost Curve')
display(out)
fig = plt.figure(figsize=(9,5))
ax = fig.add_subplot(111, projection='3d')
plot = ax.plot_surface(W, B, cost_sim_ce, cmap=cm.nipy_spectral)
cbar=plt.colorbar(plot)
cbar.set_label('\nCost', fontsize=20)
plt.title('Cost Curve\n', fontsize=20)
ax.set_xlabel('Gradient', fontsize=15)
ax.set_ylabel('Y-Intercept', fontsize=15)
ax.set_zlabel(' Cost', fontsize=15)
plt.show()
minimum_cost_index = np.argmin(plot_cost)
CE_C = plot_cost[minimum_cost_index]
CE_b = plot_b[minimum_cost_index]
CE_w = plot_w[minimum_cost_index]
print('Minimum Index:', minimum_cost_index, \
'\nCost:', plot_cost[minimum_cost_index], \
'\nY-Intercept:', plot_b[minimum_cost_index], \
'\nGradient:', plot_w[minimum_cost_index])
plot_model('Best Variables from Mean Squared Error', init_weight=MSE_w, init_bias=MSE_b)
plot_model('Best Variables from Cross Entropy Loss', init_weight=CE_w, init_bias=CE_b)
def one_iteration(x, y, learning_rate=1e-1, first=False):
global theta, prev_theta
prev_theta = theta
X = np.vstack([np.ones(y.shape[0]), x])
# Model
pred = sigmoid(np.dot(theta, X))
# Calculations for Backpropagation
error = pred - y
cost = sum(error**2)/len(error)
dcost_dtheta = np.array([sum(2 * Xi * error)/len(error) for Xi in X])
theta = theta - (dcost_dtheta * learning_rate)
if len(x.shape) == 1:
data = "Cost: {}\nBias: {}\nWeight: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight: {}\n".format(cost, theta[0], theta[1], dcost_dtheta[0], dcost_dtheta[1])
elif len(x.shape) == 2:
data = "Cost: {}\nBias: {}\nWeight 1: {}\nWeight 2: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight 1: {}\nGradient of Cost Curve to Weight 2: {}\n"\
.format(cost, theta[0], theta[1], theta[2], dcost_dtheta[0], dcost_dtheta[1], dcost_dtheta[2])
return cost, dcost_dtheta, data
def find_nearest(array, value):
idx = (np.abs(array - value)).argmin()
return idx
def update(iterations, nested=0):
# Run Iteration
if nested:
for i in range(nested):
cost, dcost_dtheta, data = one_iteration(x, y)
else:
cost, dcost_dtheta, data = one_iteration(x, y)
# Replot Cost Curves
b_nearest = find_nearest(b_sim, prev_theta[0])
w_nearest = find_nearest(w_sim, prev_theta[1])
curve_b.set_ydata(cost_sim_mse[w_nearest])
curve_w.set_ydata(cost_sim_mse[:, b_nearest])
# Replot Tangents
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
intercept_w = cost - (prev_theta[1] * dcost_dtheta[1])
tangent_b.set_ydata((dcost_dtheta[0]*b_sim)+intercept_b)
tangent_w.set_ydata((dcost_dtheta[1]*w_sim)+intercept_w)
# Replot Regression Line
line.set_ydata(model(theta[0], theta[1], line_x))
# Clear and Redraw
out.clear_output()
fig.canvas.draw()
out.append_stdout(data)
def update_ten(iterations):
if visualize.value:
for i in range(10):
update(1)
else:
update(1, nested=10)
def update_hundred(iterations):
if visualize.value:
for i in range(100):
update(1)
else:
update(1, nested=100)
##### Widgets #####
out = widgets.Output(layout={'border': '1px solid black'})
button_one = widgets.Button(description="1 Iteration")
button_one.on_click(update)
button_ten = widgets.Button(description="10 Iterations")
button_ten.on_click(update_ten)
button_hundred = widgets.Button(description="!!!100 Iterations!!!")
button_hundred.on_click(update_hundred)
visualize = widgets.Checkbox(value=False, description='Visualize Changes', disabled=False)
hbox = widgets.HBox([button_one, button_ten, button_hundred, visualize])
# Initialize Random Weight & Bias
theta = np.random.uniform(-8,8, size=2)
# theta = np.array([-5, 5])
# Run First Iteration
cost, dcost_dtheta, data = one_iteration(x, y)
b_nearest = find_nearest(b_sim, theta[0])
w_nearest = find_nearest(w_sim, theta[1])
############################
### Visualize Cost Curve ###
############################
fig = plt.figure(figsize=(9,6))
ax = fig.add_subplot(111)
plt.suptitle('Cost Curves (MSE)')
##### Bias Subplot #####
plt.subplot(2,2,1)
# plt.axis([-10.5,10.5,-500,2000])
plt.title('Bias')
plt.xlabel('Bias')
plt.ylabel('Cost')
# Cost Curve
curve_b, = plt.plot(b_sim, cost_sim_mse[w_nearest], label='Cost Curve')
x1, x2, y1, y2 = plt.axis()
plt.axis([x1,x2,-0.3,y2])
# Tangent
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
tangent_b, = plt.plot(b_sim, (dcost_dtheta[0]*b_sim)+intercept_b, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Weight Subplot #####
plt.subplot(2,2,2)
plt.title('Weight')
plt.xlabel('Weight')
# Cost Curve
curve_w, = plt.plot(w_sim, cost_sim_mse[:, b_nearest], label='Cost Curve')
x1, x2, y1, y2 = plt.axis()
plt.axis([x1,x2,-0.3,y2])
# Tangent
intercept_w = cost - (prev_theta[1] * dcost_dtheta[1])
tangent_w, = plt.plot(w_sim, (dcost_dtheta[1]*w_sim)+intercept_w, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Regression Line Subplot #####
plt.subplot(2,2,(3,4))
# plt.axis([-1,8,-3,10])
plt.title('Regression')
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.axhline(0.5, color='black', linewidth=0.5)
line_x = np.arange(-3, 8)
##### Data Points #####
plt.scatter(x, y, label='True Values', c='red')
##### Regression #####
line, = plt.plot(line_x, model(theta[0], theta[1], line_x), label='Logistic Regression Line', c='green')
plt.legend();
update(1)
display(hbox)
out
def one_iteration(x, y, learning_rate=1e-1, first=False):
global theta, prev_theta
prev_theta = theta
X = np.vstack([np.ones(y.shape[0]), x])
# Model
pred = sigmoid(np.dot(theta, X))
# Calculations for Backpropagation
error = ((y * np.log(pred + epsilon)) + ((1-y) * np.log(1-pred + epsilon)))
cost = - np.sum(error, len(error.shape)-1)/len(error)
dcost_dtheta = np.array([sum(Xi * (pred-y))/len(error) for Xi in X])
theta = theta - (dcost_dtheta * learning_rate)
if len(x.shape) == 1:
data = "Cost: {}\nBias: {}\nWeight: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight: {}\n".format(cost, theta[0], theta[1], dcost_dtheta[0], dcost_dtheta[1])
elif len(x.shape) == 2:
data = "Cost: {}\nBias: {}\nWeight 1: {}\nWeight 2: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight 1: {}\nGradient of Cost Curve to Weight 2: {}\n"\
.format(cost, theta[0], theta[1], theta[2], dcost_dtheta[0], dcost_dtheta[1], dcost_dtheta[2])
return cost, dcost_dtheta, data
def find_nearest(array, value):
idx = (np.abs(array - value)).argmin()
return idx
def update(iterations, nested=0):
# Run Iteration
if nested:
for i in range(nested):
cost, dcost_dtheta, data = one_iteration(x, y)
else:
cost, dcost_dtheta, data = one_iteration(x, y)
# Replot Cost Curves
b_nearest = find_nearest(b_sim, prev_theta[0])
w_nearest = find_nearest(w_sim, prev_theta[1])
curve_b.set_ydata(cost_sim_ce[w_nearest])
curve_w.set_ydata(cost_sim_ce[:, b_nearest])
# Replot Tangents
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
intercept_w = cost - (prev_theta[1] * dcost_dtheta[1])
tangent_b.set_ydata((dcost_dtheta[0]*b_sim)+intercept_b)
tangent_w.set_ydata((dcost_dtheta[1]*w_sim)+intercept_w)
# Replot Regression Line
line.set_ydata(model(theta[0], theta[1], line_x))
# Clear and Redraw
out.clear_output()
fig.canvas.draw()
out.append_stdout(data)
def update_ten(iterations):
if visualize.value:
for i in range(10):
update(1)
else:
update(1, nested=10)
def update_hundred(iterations):
if visualize.value:
for i in range(100):
update(1)
else:
update(1, nested=100)
##### Widgets #####
out = widgets.Output(layout={'border': '1px solid black'})
button_one = widgets.Button(description="1 Iteration")
button_one.on_click(update)
button_ten = widgets.Button(description="10 Iterations")
button_ten.on_click(update_ten)
button_hundred = widgets.Button(description="!!!100 Iterations!!!")
button_hundred.on_click(update_hundred)
visualize = widgets.Checkbox(value=False, description='Visualize Changes', disabled=False)
hbox = widgets.HBox([button_one, button_ten, button_hundred, visualize])
# Initialize Random Weight & Bias
theta = np.random.uniform(-8,8, size=2)
# theta = np.array([-5, 5])
# Run First Iteration
cost, dcost_dtheta, data = one_iteration(x, y)
b_nearest = find_nearest(b_sim, theta[0])
w_nearest = find_nearest(w_sim, theta[1])
############################
### Visualize Cost Curve ###
############################
fig = plt.figure(figsize=(9,6))
ax = fig.add_subplot(111)
plt.suptitle('Cost Curves (Cross Entropy)')
##### Bias Subplot #####
plt.subplot(2,2,1)
# plt.axis([-10.5,10.5,-500,2000])
plt.title('Bias')
plt.xlabel('Bias')
plt.ylabel('Cost')
# Cost Curve
curve_b, = plt.plot(b_sim, cost_sim_ce[w_nearest], label='Cost Curve')
x1, x2, y1, y2 = plt.axis()
plt.axis([x1,x2,-1,y2])
# Tangent
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
tangent_b, = plt.plot(b_sim, (dcost_dtheta[0]*b_sim)+intercept_b, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Weight Subplot #####
plt.subplot(2,2,2)
plt.title('Weight')
plt.xlabel('Weight')
# Cost Curve
curve_w, = plt.plot(w_sim, cost_sim_ce[:, b_nearest], label='Cost Curve')
x1, x2, y1, y2 = plt.axis()
plt.axis([x1,x2,-1,y2])
# Tangent
intercept_w = cost - (prev_theta[1] * dcost_dtheta[1])
tangent_w, = plt.plot(w_sim, (dcost_dtheta[1]*w_sim)+intercept_w, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Regression Line Subplot #####
plt.subplot(2,2,(3,4))
# plt.axis([-1,8,-3,10])
plt.title('Regression')
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.axhline(0.5, color='black', linewidth=0.5)
line_x = np.arange(-3, 8)
##### Data Points #####
plt.scatter(x, y, label='True Values', c='red')
##### Regression #####
line, = plt.plot(line_x, model(theta[0], theta[1], line_x), label='Logistic Regression Line', c='green')
plt.legend();
update(1)
display(hbox)
out
def model(b, w1, x1, w2, x2):
return sigmoid(b + (w1 * x1) + (w2 * x2))
x1 = np.array([1., 2.8, 1.2, 2.7, 3.2, 2.4, 3.6, 4.6])
x1 = (x1 - x1.mean()) * 3
x2 = np.array([8.1, 6.7, 6.5, 3.2, 5.1, 4.9, 3.9, 4.1])
x2 = (x2 - x2.mean()) * 3
y = np.array([0, 0, 0, 1, 1, 0, 1, 1 ])
X = np.vstack([x1, x2])
fig = plt.figure(figsize=(4,3))
plt.title('x1 vs Y'); plt.xlabel('x1'); plt.ylabel('Y')
plt.scatter(x1, y, c='r')
plt.show()
fig = plt.figure(figsize=(4,3))
plt.title('x2 vs Y'); plt.xlabel('x2'); plt.ylabel('Y')
plt.scatter(x2, y, c='g')
plt.show()
size = 100
line_x1 = np.linspace(-5,5,size)
line_x2 = np.linspace(-5,5,size)
lx1, lx2 = np.meshgrid(line_x1, line_x2)
init = np.random.randn(3)
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x1, x2, y, s=100, c='r')
z = model(init[0], init[1], lx1, init[2], lx2)
line = ax.plot_surface(lx1, lx2, z, color='green')
plt.suptitle('Model with 3 Variables', fontsize=15)
plt.axes([0, 10, 0, 10])
def update(intercept=init[0], weight_1=init[1], weight_2=init[2]):
global ax, line
ax.clear()
ax.set_xlabel('X1', fontsize=15)
ax.set_ylabel('X2', fontsize=15)
ax.set_zlabel('Y', fontsize=15)
ax.scatter(x1, x2, y, s=100, c='r')
z = model(intercept, weight_1, lx1, weight_2, lx2)
line = ax.plot_surface(lx1, lx2, z, color='green')
fig.canvas.draw()
intercept = widgets.FloatText(value=init[0], description='Y-Intercept', step=0.1)
gradient_1 = widgets.FloatText(value=init[1], description='Gradient 1', step=0.1)
gradient_2 = widgets.FloatText(value=init[2], description='Gradient 2', step=0.1)
widgets.interactive(update, intercept=intercept, weight_1=gradient_1, weight_2=gradient_2)
4.1 Simulation for Visualization
size = 100
w1_raw = np.linspace(-15,15,size)
w2_raw = np.linspace(-15,15,size)
w1_sim = np.repeat(w1_raw, size)
w2_sim = np.tile(w2_raw, size)
b_sim = np.linspace(-20, 20, size)
w1_matrix = w1_sim.reshape(size*size, 1)
w2_matrix = w2_sim.reshape(size*size, 1)
b_matrix = b_sim.reshape(1, size)
x1_matrix = x1.reshape(1, x1.shape[0])
x2_matrix = x2.reshape(1, x2.shape[0])
y_matrix = y.reshape(1, y.shape[0])
plot_w1 = np.repeat(w1_raw, size*size)
plot_w2 = np.tile(np.repeat(w2_raw, size), size)
plot_b = np.tile(b_sim, size*size)
# (Sw2xSw1,1) x (1xN) --> (Sw2xSw1,N) --reshape--> (Sw1xSw2,N,1)
w_matrix = np.hstack([w1_matrix, w2_matrix])
x_matrix = np.vstack([x1_matrix, x2_matrix])
wx = np.dot(w_matrix, x_matrix).reshape(size*size, x1.shape[0], 1)
# (Sw2xSw1,N,1) + (1,Sb) --> (Sw2xSw1,N,Sb) --tranpose--> (Sw2xSw1,Sb,N)
pred_sim = sigmoid((wx + b_matrix).transpose(0, 2, 1))
# ((Sw1xSw2,Sb,N) - (1,N))^2 --> (Sw1xSw2,Sb,N)
error_sim = pred_sim - y_matrix
squared_error_sim = error_sim**2
print(squared_error_sim.shape)
# Sum (Sw1xSw2,Sb,N) in Dim 2 --> (Sw1xSw2,Sb)
cost_sim_mse = np.sum(squared_error_sim, len(squared_error_sim.shape)-1)/len(y)
# Flatten (Sw1xSw2,Sb) --> (Sw1xSw2 x Sb)
plot_cost = cost_sim_mse.reshape(-1) # Flatten (50,50) --> (2500,)
pd.DataFrame({'Gradient_1': plot_w1, 'Gradient_2': plot_w2, 'Intercept': plot_b, 'Cost': plot_cost}).head(5)
# 3D Visualization
fig = plt.figure(figsize=(9,6))
ax = fig.add_subplot(111, projection='3d')
plot = ax.scatter(plot_w1, plot_w2, plot_b, c=plot_cost, cmap='nipy_spectral')
cbar=plt.colorbar(plot)
cbar.set_label('\nCost', fontsize=20)
plt.title('Cost Curve (MSE)\n', fontsize=30)
ax.set_xlabel('Gradient 1 (Theta 1)', fontsize=15)
ax.set_ylabel('Gradient 2 (Theta 2)', fontsize=15)
ax.set_zlabel('Y-Intercept (Theta 0)', fontsize=15)
plt.show()
minimum_cost_index = np.argmin(plot_cost)
print('Minimum Index:', minimum_cost_index, \
'\nCost:', plot_cost[minimum_cost_index], \
'\nY-Intercept:', plot_b[minimum_cost_index], \
'\nGradient 1:', plot_w1[minimum_cost_index], \
'\nGradient 2:', plot_w2[minimum_cost_index])
epsilon = 1e-8 # To Prevent -inf in log(pred)
# ((Sw,Sb,N) - (1,N))^2 --> (Sw,Sb,N)
error = ((y_matrix * np.log(pred_sim + epsilon)) + ((1-y_matrix) * np.log(1-pred_sim + epsilon)))
# Flatten (Sw,Sb) --> (Sw x Sb)
cost_sim_ce = -np.sum(error, len(error.shape)-1)/len(y) # Sum (50, 50, 8) in Dim 2 --> (50, 50)
plot_cost = cost_sim_ce.reshape(-1) # Flatten (50,50) --> (2500,)
# 3D Visualization
fig = plt.figure(figsize=(9,6))
ax = fig.add_subplot(111, projection='3d')
plot = ax.scatter(plot_w1, plot_w2, plot_b, c=plot_cost, cmap='nipy_spectral')
cbar=plt.colorbar(plot)
cbar.set_label('\nCost', fontsize=20)
plt.title('Cost Curve (Cross Entropy)\n', fontsize=30)
ax.set_xlabel('Gradient 1 (Theta 1)', fontsize=15)
ax.set_ylabel('Gradient 2 (Theta 2)', fontsize=15)
ax.set_zlabel('Y-Intercept (Theta 0)', fontsize=15)
plt.show()
minimum_cost_index = np.argmin(plot_cost)
print('Minimum Index:', minimum_cost_index, \
'\nCost:', plot_cost[minimum_cost_index], \
'\nY-Intercept:', plot_b[minimum_cost_index], \
'\nGradient 1:', plot_w1[minimum_cost_index], \
'\nGradient 2:', plot_w2[minimum_cost_index])
def one_iteration(x, y, learning_rate=1e-1, first=False):
global theta, prev_theta
prev_theta = theta
X = np.vstack([np.ones(y.shape[0]), x])
# Model
pred = sigmoid(np.dot(theta, X))
# Calculations for Backpropagation
error = pred - y
cost = sum(error**2)/len(error)
dcost_dtheta = np.array([sum(2 * Xi * error)/len(error) for Xi in X])
theta = theta - (dcost_dtheta * learning_rate)
if len(x.shape) == 1:
data = "Cost: {}\nBias: {}\nWeight: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight: {}\n".format(cost, theta[0], theta[1], dcost_dtheta[0], dcost_dtheta[1])
elif len(x.shape) == 2:
data = "Cost: {}\nBias: {}\nWeight 1: {}\nWeight 2: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight 1: {}\nGradient of Cost Curve to Weight 2: {}\n"\
.format(cost, theta[0], theta[1], theta[2], dcost_dtheta[0], dcost_dtheta[1], dcost_dtheta[2])
return cost, dcost_dtheta, data
def find_nearest(array, value):
idx = (np.abs(array - value)).argmin()
return idx
def match(array, value):
cond = array == value
return np.where(cond)[0]
def match_2(array1, array2, value1, value2):
cond1 = array1 == value1
cond2 = array2 == value2
a = np.where(cond1)[0]
b = np.where(cond2)[0]
return a[np.where(np.in1d(a, b))][0]
def update(iterations, nested=0):
# Run Iteration
if nested:
for i in range(nested):
cost, dcost_dtheta, data = one_iteration(X, y)
else:
cost, dcost_dtheta, data = one_iteration(X, y)
# Replot Cost Curves
b_nearest = find_nearest(b_sim, prev_theta[0])
w1_nearest = find_nearest(w1_raw, prev_theta[1])
w2_nearest = find_nearest(w2_raw, prev_theta[2])
curve_b.set_ydata(cost_sim_mse[match_2(w1_sim, w2_sim, w1_raw[w1_nearest], w2_raw[w2_nearest])])
curve_w1.set_ydata(cost_sim_mse[match(w2_sim, w2_raw[w2_nearest]), b_nearest])
curve_w2.set_ydata(cost_sim_mse[match(w1_sim, w1_raw[w1_nearest]), b_nearest])
# Replot Tangents
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
intercept_w1 = cost - (prev_theta[1] * dcost_dtheta[1])
intercept_w2 = cost - (prev_theta[2] * dcost_dtheta[2])
tangent_b.set_ydata((dcost_dtheta[0]*b_sim)+intercept_b)
tangent_w1.set_ydata((dcost_dtheta[1]*w1_raw)+intercept_w1)
tangent_w2.set_ydata((dcost_dtheta[2]*w2_raw)+intercept_w2)
# Clear and Redraw
out.clear_output()
out.append_stdout(data)
fig.canvas.draw()
def update_ten(iterations):
if visualize.value:
for i in range(10):
update(1)
else:
update(1, nested=10)
def update_hundred(iterations):
if visualize.value:
for i in range(100):
update(1)
else:
update(1, nested=100)
def update_thousand(iterations):
if visualize.value:
print('Not Allowed, Unselect Visualize to Proceed.')
else:
update(1, nested=1000)
##### Widgets #####
out = widgets.Output(layout={'border': '1px solid black'})
button_one = widgets.Button(description="1 Iteration")
button_one.on_click(update)
button_ten = widgets.Button(description="10 Iterations")
button_ten.on_click(update_ten)
button_hundred = widgets.Button(description="!!!100 Iterations!!!")
button_hundred.on_click(update_hundred)
button_thousand = widgets.Button(description="!!!Warning - 1000!!!")
button_thousand.on_click(update_thousand)
visualize = widgets.Checkbox(value=False, description='Visualize Changes', disabled=False)
hbox = widgets.HBox([button_one, button_ten, button_hundred, button_thousand, visualize])
# Initialize Random Weights & Bias
theta = np.random.uniform(-5,5,size=3)
# Run First Iteration
cost, dcost_dtheta, data = one_iteration(X, y)
b_nearest = find_nearest(b_sim, prev_theta[0])
w1_nearest = find_nearest(w1_raw, prev_theta[1])
w2_nearest = find_nearest(w2_raw, prev_theta[2])
############################
### Visualize Cost Curve ###
############################
fig = plt.figure(figsize=(10.5,5))
ax = fig.add_subplot(111)
plt.suptitle('Cost Curves (MSE)')
##### Bias Subplot #####
plt.subplot(1,3,1)
plt.title('Bias')
plt.xlabel('Bias')
plt.ylabel('Cost')
# Cost Curve
curve_b, = plt.plot(b_sim, cost_sim_mse[match_2(w1_sim, w2_sim, w1_raw[w1_nearest], w2_raw[w2_nearest])], label='Cost Curve')
d1,d2,d3,d4 = plt.axis()
# plt.axis([d1,d2,-1,10])
# Tangent
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
tangent_b, = plt.plot(b_sim, (dcost_dtheta[0]*b_sim)+intercept_b, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Weight 1 Subplot #####
plt.subplot(1,3,2)
# plt.axis([-30.5,30.5,-1000,1000])
plt.title('Weight 1')
plt.xlabel('Weight 1')
# Cost Curve
curve_w1, = plt.plot(w1_raw, cost_sim_mse[match(w2_sim, w2_raw[w2_nearest]), b_nearest], label='Cost Curve')
d1,d2,d3,d4 = plt.axis()
# plt.axis([d1,d2,-1,12])
# Tangent
intercept_w1 = cost - (prev_theta[1] * dcost_dtheta[1])
tangent_w1, = plt.plot(w1_raw, (dcost_dtheta[1]*w1_raw)+intercept_w1, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Weight 2 Subplot #####
plt.subplot(1,3,3)
plt.title('Weight 2')
plt.xlabel('Weight 2')
# Cost Curve
curve_w2, = plt.plot(w2_raw, cost_sim_mse[match(w1_sim, w1_raw[w1_nearest]), b_nearest], label='Cost Curve')
d1,d2,d3,d4 = plt.axis()
# plt.axis([d1,d2,-1,12])
# Tangent
intercept_w2 = cost - (prev_theta[2] * dcost_dtheta[2])
tangent_w2, = plt.plot(w2_raw, (dcost_dtheta[2]*w2_raw)+intercept_w2, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
update(1)
display(hbox)
out
def update(iterations, nested=1):
for _ in range(nested):
cost, dcost_dtheta, data = one_iteration(X, y)
ax.clear()
ax.set_xlabel('x1', fontsize=15)
ax.set_ylabel('x2', fontsize=15)
ax.set_zlabel('Y', fontsize=15)
ax.scatter(x1, x2, y, s=100, c='r')
z = model(theta[0], theta[1], lx1, theta[2], lx2)
line = ax.plot_surface(lx1, lx2, z, color='green')
# line = ax.scatter(line_x_1, line_x_2, z, s=5, c=z)
out.clear_output()
out.append_stdout(data)
fig.canvas.draw()
def update_ten(iterations):
if visualize.value:
for i in range(10):
update(1)
else:
update(1, nested=10)
def update_hundred(iterations):
if visualize.value:
for i in range(100):
update(1)
else:
update(1, nested=100)
def update_ten_thousand(iterations):
if visualize.value:
print('Not Allowed, Unselect Visualize to Proceed.')
else:
update(1, nested=10000)
##### Widgets #####
out = widgets.Output(layout={'border': '1px solid black'})
button_one = widgets.Button(description="1 Iteration")
button_one.on_click(update)
button_ten = widgets.Button(description="10 Iterations")
button_ten.on_click(update_ten)
button_hundred = widgets.Button(description="!!!100 Iterations!!!")
button_hundred.on_click(update_hundred)
button_ten_thousand = widgets.Button(description="!!!Warning - 10000!!!")
button_ten_thousand.on_click(update_ten_thousand)
visualize = widgets.Checkbox(value=False, description='Visualize Changes', disabled=False)
hbox = widgets.HBox([button_one, button_ten, button_hundred, button_ten_thousand, visualize])
# Initialize Random Weights & Bias
theta = np.random.uniform(-5,5,size=3)
line_x_1 = np.repeat(np.linspace(-5,5,size), size)
line_x_2 = np.tile(np.linspace(-5,5,size), size)
##################################
### Visualize Regression Plane ###
##################################
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111, projection='3d')
plt.suptitle('Regression Plane', fontsize=15)
ax.set_xlabel('x1', fontsize=15)
ax.set_ylabel('x2', fontsize=15)
ax.set_zlabel('Y', fontsize=15)
ax.scatter(x1, x2, y, s=100, c='r')
z = model(theta[0], theta[1], lx1, theta[2], lx2)
line = ax.plot_surface(lx1, lx2, z, color='green')
update(1)
display(hbox)
out
def one_iteration(x, y, learning_rate=1e-1, first=False):
global theta, prev_theta
prev_theta = theta
X = np.vstack([np.ones(y.shape[0]), x])
# Model
pred = sigmoid(np.dot(theta, X))
# Calculations for Backpropagation
error = ((y * np.log(pred + epsilon)) + ((1-y) * np.log(1-pred + epsilon)))
cost = - np.sum(error, len(error.shape)-1)/len(error)
dcost_dtheta = np.array([sum(Xi * (pred-y))/len(error) for Xi in X])
theta = theta - (dcost_dtheta * learning_rate)
if len(x.shape) == 1:
data = "Cost: {}\nBias: {}\nWeight: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight: {}\n".format(cost, theta[0], theta[1], dcost_dtheta[0], dcost_dtheta[1])
elif len(x.shape) == 2:
data = "Cost: {}\nBias: {}\nWeight 1: {}\nWeight 2: {}\nGradient of Cost Curve to Bias: {}\
\nGradient of Cost Curve to Weight 1: {}\nGradient of Cost Curve to Weight 2: {}\n"\
.format(cost, theta[0], theta[1], theta[2], dcost_dtheta[0], dcost_dtheta[1], dcost_dtheta[2])
return cost, dcost_dtheta, data
def find_nearest(array, value):
idx = (np.abs(array - value)).argmin()
return idx
def match(array, value):
cond = array == value
return np.where(cond)[0]
def match_2(array1, array2, value1, value2):
cond1 = array1 == value1
cond2 = array2 == value2
a = np.where(cond1)[0]
b = np.where(cond2)[0]
return a[np.where(np.in1d(a, b))][0]
def update(iterations, nested=0):
# Run Iteration
if nested:
for i in range(nested):
cost, dcost_dtheta, data = one_iteration(X, y)
else:
cost, dcost_dtheta, data = one_iteration(X, y)
# Replot Cost Curves
b_nearest = find_nearest(b_sim, prev_theta[0])
w1_nearest = find_nearest(w1_raw, prev_theta[1])
w2_nearest = find_nearest(w2_raw, prev_theta[2])
curve_b.set_ydata(cost_sim_ce[match_2(w1_sim, w2_sim, w1_raw[w1_nearest], w2_raw[w2_nearest])])
curve_w1.set_ydata(cost_sim_ce[match(w2_sim, w2_raw[w2_nearest]), b_nearest])
curve_w2.set_ydata(cost_sim_ce[match(w1_sim, w1_raw[w1_nearest]), b_nearest])
# Replot Tangents
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
intercept_w1 = cost - (prev_theta[1] * dcost_dtheta[1])
intercept_w2 = cost - (prev_theta[2] * dcost_dtheta[2])
tangent_b.set_ydata((dcost_dtheta[0]*b_sim)+intercept_b)
tangent_w1.set_ydata((dcost_dtheta[1]*w1_raw)+intercept_w1)
tangent_w2.set_ydata((dcost_dtheta[2]*w2_raw)+intercept_w2)
# Clear and Redraw
out.clear_output()
out.append_stdout(data)
fig.canvas.draw()
def update_ten(iterations):
if visualize.value:
for i in range(10):
update(1)
else:
update(1, nested=10)
def update_hundred(iterations):
if visualize.value:
for i in range(100):
update(1)
else:
update(1, nested=100)
def update_thousand(iterations):
if visualize.value:
print('Not Allowed, Unselect Visualize to Proceed.')
else:
update(1, nested=1000)
##### Widgets #####
out = widgets.Output(layout={'border': '1px solid black'})
button_one = widgets.Button(description="1 Iteration")
button_one.on_click(update)
button_ten = widgets.Button(description="10 Iterations")
button_ten.on_click(update_ten)
button_hundred = widgets.Button(description="!!!100 Iterations!!!")
button_hundred.on_click(update_hundred)
button_thousand = widgets.Button(description="!!!Warning - 1000!!!")
button_thousand.on_click(update_thousand)
visualize = widgets.Checkbox(value=False, description='Visualize Changes', disabled=False)
hbox = widgets.HBox([button_one, button_ten, button_hundred, button_thousand, visualize])
# Initialize Random Weights & Bias
theta = np.random.uniform(-5,5,size=3)
# Run First Iteration
cost, dcost_dtheta, data = one_iteration(X, y)
b_nearest = find_nearest(b_sim, prev_theta[0])
w1_nearest = find_nearest(w1_raw, prev_theta[1])
w2_nearest = find_nearest(w2_raw, prev_theta[2])
############################
### Visualize Cost Curve ###
############################
fig = plt.figure(figsize=(10.5,5))
ax = fig.add_subplot(111)
plt.suptitle('Cost Curves')
##### Bias Subplot #####
plt.subplot(1,3,1)
plt.title('Bias')
plt.xlabel('Bias')
plt.ylabel('Cost')
# Cost Curve
curve_b, = plt.plot(b_sim, cost_sim_ce[match_2(w1_sim, w2_sim, w1_raw[w1_nearest], w2_raw[w2_nearest])], label='Cost Curve')
d1,d2,d3,d4 = plt.axis()
plt.axis([-20,20,-5,15])
# Tangent
intercept_b = cost - (prev_theta[0] * dcost_dtheta[0])
tangent_b, = plt.plot(b_sim, (dcost_dtheta[0]*b_sim)+intercept_b, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Weight 1 Subplot #####
plt.subplot(1,3,2)
# plt.axis([-30.5,30.5,-1000,1000])
plt.title('Weight 1')
plt.xlabel('Weight 1')
# Cost Curve
curve_w1, = plt.plot(w1_raw, cost_sim_ce[match(w2_sim, w2_raw[w2_nearest]), b_nearest], label='Cost Curve')
d1,d2,d3,d4 = plt.axis()
plt.axis([-15,15,-5,15])
# Tangent
intercept_w1 = cost - (prev_theta[1] * dcost_dtheta[1])
tangent_w1, = plt.plot(w1_raw, (dcost_dtheta[1]*w1_raw)+intercept_w1, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
##### Weight 2 Subplot #####
plt.subplot(1,3,3)
plt.title('Weight 2')
plt.xlabel('Weight 2')
# Cost Curve
curve_w2, = plt.plot(w2_raw, cost_sim_ce[match(w1_sim, w1_raw[w1_nearest]), b_nearest], label='Cost Curve')
d1,d2,d3,d4 = plt.axis()
# plt.axis([d1,d2,-1,12])
# Tangent
intercept_w2 = cost - (prev_theta[2] * dcost_dtheta[2])
tangent_w2, = plt.plot(w2_raw, (dcost_dtheta[2]*w2_raw)+intercept_w2, label='Gradient (Tangent Line)', linestyle='--')
plt.legend();
update(1)
display(hbox)
out
def update(iterations, nested=1):
for _ in range(nested):
cost, dcost_dtheta, data = one_iteration(X, y)
ax.clear()
ax.set_xlabel('x1', fontsize=15)
ax.set_ylabel('x2', fontsize=15)
ax.set_zlabel('Y', fontsize=15)
ax.scatter(x1, x2, y, s=100, c='r')
z = model(theta[0], theta[1], lx1, theta[2], lx2)
line = ax.plot_surface(lx1, lx2, z, color='green')
out.clear_output()
out.append_stdout(data)
fig.canvas.draw()
def update_ten(iterations):
if visualize.value:
for i in range(10):
update(1)
else:
update(1, nested=10)
def update_hundred(iterations):
if visualize.value:
for i in range(100):
update(1)
else:
update(1, nested=100)
def update_ten_thousand(iterations):
if visualize.value:
print('Not Allowed, Unselect Visualize to Proceed.')
else:
update(1, nested=10000)
##### Widgets #####
out = widgets.Output(layout={'border': '1px solid black'})
button_one = widgets.Button(description="1 Iteration")
button_one.on_click(update)
button_ten = widgets.Button(description="10 Iterations")
button_ten.on_click(update_ten)
button_hundred = widgets.Button(description="!!!100 Iterations!!!")
button_hundred.on_click(update_hundred)
button_ten_thousand = widgets.Button(description="!!!Warning - 10000!!!")
button_ten_thousand.on_click(update_ten_thousand)
visualize = widgets.Checkbox(value=False, description='Visualize Changes', disabled=False)
hbox = widgets.HBox([button_one, button_ten, button_hundred, button_ten_thousand, visualize])
# Initialize Random Weights & Bias
theta = np.random.uniform(-5,5,size=3)
line_x_1 = np.repeat(np.linspace(-5,5,size), size)
line_x_2 = np.tile(np.linspace(-5,5,size), size)
##################################
### Visualize Regression Plane ###
##################################
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111, projection='3d')
plt.suptitle('Logistic Regression Plane', fontsize=15)
ax.set_xlabel('x1', fontsize=15)
ax.set_ylabel('x2', fontsize=15)
ax.set_zlabel('Y', fontsize=15)
ax.scatter(x1, x2, y, s=100, c='r')
z = model(theta[0], theta[1], lx1, theta[2], lx2)
line = ax.plot_surface(lx1, lx2, z, color='green')
update(1)
display(hbox)
out