import numpy as np
import matplotlib.pyplot as plt
def func(x):
return x**2 + 20
def gradient_at(x):
return 2*x
x = np.array(list(range(50)))/10
fig = plt.figure(figsize=(12,6))
fig.add_subplot(111)
plt.subplot(1,2,1)
plt.title('Function', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, func(x), label='Function', c='r')
plt.subplot(1,2,2)
plt.title('Gradient', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, gradient_at(x), label='Gradient', c='g')
plt.show();
gradient_at(4)
def func(x):
return np.exp(2*x)
def gradient_at(x):
return 2 * np.exp(2*x)
x = np.array(list(range(50)))/10
fig = plt.figure(figsize=(12,6))
fig.add_subplot(111)
plt.subplot(1,2,1); plt.title('Function', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, func(x), label='Function', c='r')
plt.subplot(1,2,2); plt.title('Gradient', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, gradient_at(x), label='Gradient', c='g')
plt.show();
def func(x):
return np.log(5*x)
def gradient_at(x):
return 1/x
x = np.array(list(range(50)))/10 + 0.1
fig = plt.figure(figsize=(12,6))
fig.add_subplot(111)
plt.subplot(1,2,1)
plt.title('Function', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, func(x), label='Function', c='r')
plt.subplot(1,2,2)
plt.title('Gradient', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, gradient_at(x), label='Gradient', c='g')
plt.show();
def func(x):
return x**2 - 9*x + 25
def gradient_at(x):
return 2*x - 9
x = np.array(list(range(100)))/10
fig = plt.figure(figsize=(12,6))
fig.add_subplot(111)
plt.subplot(1,2,1); plt.axis([-0.5, 10.5, 0, 35])
plt.title('Function', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, func(x), label='Function', c='r')
plt.subplot(1,2,2)
plt.title('Gradient', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.axhline(0, c='black', linewidth=0.5)
plt.plot(x, gradient_at(x), label='Gradient', c='g')
plt.show();
gradient_at(5)
def find_x(gradient):
return (gradient + 9)/2
find_x(0)
x = np.array(list(range(100)))/10
fig = plt.figure(figsize=(12,6))
fig.add_subplot(111)
plt.subplot(1,2,1); plt.axis([-0.5, 10.5, 0, 35])
plt.title('Function', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.plot(x, func(x), label='Function', c='r')
plt.axhline(func(find_x(0)), c='black', linestyle='--', linewidth=1)
plt.subplot(1,2,2)
plt.title('Gradient', fontsize=20); plt.xlabel('x', fontsize=15); plt.ylabel('y', fontsize=15)
plt.axhline(0, c='black', linewidth=0.5)
plt.plot(x, gradient_at(x), label='Gradient', c='g')
plt.show();
# Minimum Point:
func(find_x(0))
Which is to say, we can find the point of Minimum Error with Differentiation, by finding the point where the Gradients of the Cost Function with respect to each of the Parameters $\approx$ 0.
And to Differentiate the Error Function with respect to each Parameter, will require the application of the Chain Rule.
The Jacobian Matrix is just a term for the Matrix of Gradients.
Since we are doing Machine Learning, we can Randomly Initialize the weights and let the Machine Learn the Optimal Values of each Parameter.
We can adjust our randomly initialized Parameters towards the Optimal Point by utilizing the Jacobian Matrix (Gradients of Cost Curve with respect to each Parameter)
Gradient is POSITIVE
Want to DECREASE the Value of the Parameter
Since we want to Decrease the value of the parameter, we can subtract a Positive Number from the Parameter.
Guess what Number is Positive??
Gradient is NEGATIVE
Want to INCREASE the Value of the Parameter
Since we want to Increase the value of the parameter, we can subtract a Negative Number from the Parameter.
Note: Subtracting a Negative Number is the same as Adding a Positive Number
Guess what Number is Negative??
If the Gradient is Positive, it means that the Parameter is too Large, and we can Reduce it by Subtracting the Positive Gradient.
If the Gradient is Negative, it means that the Parameter is too Small, and we can Increase it by Subtracting the Negative Gradient.
However, Gradients can be HUGE and it can throw the Parameters WAY OFF
A.K.A the Learning Rate, which is a Hyperparameter for the Training Algorithm