import numpy as np # Linear Algebra
import pandas as pd # Data Frames
import matplotlib.pyplot as plt # Visualization
from mpl_toolkits.mplot3d import axes3d # 3D Visualization
import ipywidgets as widgets # Interactivity
from IPython.display import display # Display Widgets
%matplotlib notebook
import sklearn.datasets as datasets
import time # To Track Time
boston = datasets.load_boston()
boston.keys()
print("Feature Names:\n", boston['feature_names'])
boston['data'].shape, boston['target'].shape
df = pd.DataFrame(boston['data'], columns=boston['feature_names'])
df.head()
pd.Series(boston['target']).head()
X = boston['data'][:450]
Y = boston['target'][:450]
X_val = boston['data'][450:]
Y_val = boston['target'][450:]
def model(theta, X):
return np.dot(theta, X) # Vectorize the Calculations
def train(x, y, learning_rate=3e-6, iterations=1, first=False):
global theta, prev_theta
prev_theta = theta
X = np.vstack([np.ones(y.shape[0]), x])
for _ in range(iterations):
# Model
pred = model(theta, X)
# Calculations for Backpropagation
error = pred - y
cost = np.mean(error**2)
dcost_dtheta = np.mean(X * error, 1) # Calculate Gradients
theta = theta - (dcost_dtheta * learning_rate) # Gradient Descent
return cost, dcost_dtheta
theta = np.random.randn(X.shape[1]+1)
print(theta.shape)
epochs = 25
total_time = time.time()
start = time.time()
for i in range(1, epochs+1):
lr = 5e-6 if i <= 15 else 2e-6
cost, dcost_dtheta = train(X.T, Y, learning_rate=lr, iterations=30000)
print('Epoch {} - Cost: {:.3f}\nTime: {:.2f}s\n'.format(i, cost, time.time()-start))
start = time.time()
print('Total Time Taken: {:.2f}s'.format(time.time()-total_time))
Xs = np.vstack([np.ones(Y.shape[0]), X.T])
modelpred = model(theta, Xs)
np.mean((modelpred-Y)**2) # Mean Squared Error
Xs = np.vstack([np.ones(Y_val.shape[0]), X_val.T])
modelpred = model(theta, Xs)
np.mean((modelpred-Y_val)**2) # Mean Squared Error
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, Y)
model.score(X, Y)
model.score(X_val, Y_val)
skpred = model.predict(X)
np.mean((skpred-Y)**2)
skpred = model.predict(X_val)
np.mean((skpred-Y_val)**2)
num_bags = 250
bag_size = 150
bags = []
for i in range(num_bags):
idx = np.random.choice(np.arange(X.shape[0]), bag_size)
bags.append([X[idx], Y[idx]])
models = []
for bag in bags:
models.append(LinearRegression())
models[-1].fit(bag[0], bag[1])
skpreds = []
for model in models:
skpreds.append(model.predict(X))
avg_preds = np.array(skpreds).mean(0)
np.mean((avg_preds-Y)**2)
skpreds = []
for model in models:
skpreds.append(model.predict(X_val))
avg_preds = np.array(skpreds).mean(0)
np.mean((avg_preds-Y_val)**2)
from sklearn.linear_model import Ridge
ridge_model = Ridge()
ridge_model.fit(X, Y)
ridge_pred = ridge_model.predict(X)
np.mean((ridge_pred - Y)**2)
from sklearn.linear_model import Lasso
lasso_model = Lasso()
lasso_model.fit(X, Y)
lasso_pred = lasso_model.predict(X)
np.mean((lasso_pred - Y)**2)
from sklearn.svm import SVR
SVM = SVR()
SVM.fit(X, Y)
SVM_pred = SVM.predict(X)
np.mean((SVM_pred - Y)**2)
from sklearn.tree import DecisionTreeRegressor
dec_tree = DecisionTreeRegressor()
dec_tree.fit(X, Y)
dec_tree_pred = dec_tree.predict(X)
np.mean((dec_tree_pred - Y)**2)
from sklearn.ensemble import GradientBoostingRegressor
GBR = GradientBoostingRegressor()
GBR.fit(X, Y)
GBR_pred = GBR.predict(X)
np.mean((GBR_pred - Y)**2)