Machine Learning Programming Workshop

3.3 Tensorflow/Keras (Computer Vision)

Prepared By: Cheong Shiu Hong (FTFNCE)

Version of Tensorflow and Keras

tf.__version__

'2.0.0-alpha0'

K.__version__ # tf means Tensorflow Backend

'2.2.4-tf'

1) Introduction to Computer Vision

return to top

Load Dataset

data = K.datasets.fashion_mnist

(train_images, train_labels), (val_images, val_labels) = data.load_data()

How do computer see images?

Each image is a 28x28 Array carrying values of pixel intensity

train_images[0].shape

(28, 28)

img_size = train_images[0].shape[0]
print(img_size)

28

In this demonstration, images are of low resolution (28x28) and are Black & White

What would the array look like for RGB images?

Normally, we will have to pre-process the images to standardize the resolution and shape of the image.

Numpy - Tranpose, Reshape, Resize

train_images.shape, train_labels.shape

((60000, 28, 28), (60000,))

val_images.shape, val_labels.shape

((10000, 28, 28), (10000,))

class_names = [
    'T-Shirt/Top',
    'Trousers', 
    'Pullover',
    'Dress', 
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle Boot'
]

num_classes = len(class_names)

horiz = 5; vert = 3; area = vert*horiz;
fig = plt.figure(figsize=(12,8))
for i in range(area):
    ax = plt.subplot(vert,horiz,i+1)
    ax.set_title(class_names[train_labels[i]])
    ax.imshow(train_images[i])#, cmap=plt.cm.binary)n

Rescale Arrays (Image arrays have large integer values)

Pixel values need not be of any distribution, but its range is between 0 and 255.

train_images = train_images / 255.0
val_images = val_images / 255.0

2) Sci-kit Learn

return to top

Reshape Images (Flatten)

reshaped_train = train_images[:2500].reshape(2500, img_size*img_size)
reshaped_val = val_images[:2500].reshape(2500, img_size*img_size)

reshaped_train.shape, reshaped_val.shape

((2500, 784), (2500, 784))

Logistic Regression

from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()

log_reg.fit(reshaped_train, train_labels[:2500])

C:\Users\cheon\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
C:\Users\cheon\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.
  "this warning.", FutureWarning)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

log_reg.score(reshaped_val, val_labels[:2500])

0.8288

3) Artificial Neural Networks

return to top

Let's Build a Neural Network with Keras

model = K.Sequential([
    K.layers.Flatten(input_shape=(img_size, img_size)), # Flattens 28 x 28 Image into 784
    K.layers.Dense(32, activation='relu'),
    K.layers.Dense(16, activation='relu'),
    K.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=5, batch_size=32)

Epoch 1/5
60000/60000 [==============================] - 2s 41us/sample - loss: 0.5660 - accuracy: 0.8032
Epoch 2/5
60000/60000 [==============================] - 2s 37us/sample - loss: 0.4097 - accuracy: 0.8548
Epoch 3/5
60000/60000 [==============================] - 2s 38us/sample - loss: 0.3752 - accuracy: 0.8650
Epoch 4/5
60000/60000 [==============================] - 2s 37us/sample - loss: 0.3552 - accuracy: 0.8734
Epoch 5/5
60000/60000 [==============================] - 2s 37us/sample - loss: 0.3376 - accuracy: 0.8770

<tensorflow.python.keras.callbacks.History at 0x152395857b8>

model.evaluate(val_images, val_labels) # ADAM

10000/10000 [==============================] - 0s 27us/sample - loss: 0.4132 - accuracy: 0.8464

[0.41318217957019804, 0.8464]

model.predict([[val_images[0]]])

array([[2.8331954e-07, 1.2984191e-08, 2.6422391e-05, 5.8322503e-06,
        1.4101613e-06, 2.7249558e-02, 2.9521530e-06, 5.5209454e-02,
        4.3936077e-04, 9.1706467e-01]], dtype=float32)

fig = plt.figure(figsize=(15,7))
for i in range(10):
    ax = plt.subplot(2,5,i+1)
    ax.imshow(val_images[i], cmap=plt.cm.binary)
    ax.set_title('Actual: {}'.format(class_names[val_labels[i]]))
    ax.set_xlabel('Predicted: {}'.format(class_names[np.argmax(model.predict([[val_images[i]]]))]))

4) Convolutional Neural Networks

return to top

Keras Conv2D Layer takes in (B, Sv, Sh, C) where C is the the number of channels (3 for RGB, 1 for Black & White)

PyTorch's Convolutional Layer in contrast, takes in (B, C, Sv, Sh)

conv_train_images = train_images.reshape(train_images.shape[0], img_size, img_size, 1)
conv_val_images = val_images.reshape(val_images.shape[0], img_size, img_size, 1)

conv_train_images.shape, conv_val_images.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

conv_model = K.Sequential([
    K.layers.Conv2D(8, (3,3), padding='same', activation='relu'),
    K.layers.MaxPooling2D(pool_size=(2,2)),
    K.layers.Conv2D(8, (3,3), padding='same', activation='relu'),
    K.layers.MaxPooling2D(pool_size=(2,2)),
    K.layers.Flatten(),
    K.layers.Dense(16, activation='relu'),
    K.layers.Dense(8, activation='relu'),
    K.layers.Dense(num_classes, activation='softmax')
])

conv_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

conv_model.fit(conv_train_images, train_labels, epochs=5)

Epoch 1/5
60000/60000 [==============================] - 13s 225us/sample - loss: 0.7430 - accuracy: 0.7236
Epoch 2/5
60000/60000 [==============================] - 14s 225us/sample - loss: 0.5175 - accuracy: 0.8067
Epoch 3/5
60000/60000 [==============================] - 14s 226us/sample - loss: 0.4666 - accuracy: 0.8296
Epoch 4/5
60000/60000 [==============================] - 14s 227us/sample - loss: 0.4310 - accuracy: 0.8443
Epoch 5/5
60000/60000 [==============================] - 12s 207us/sample - loss: 0.4054 - accuracy: 0.8542

<tensorflow.python.keras.callbacks.History at 0x15239aff358>

conv_model.evaluate(conv_val_images, val_labels)

10000/10000 [==============================] - 1s 110us/sample - loss: 0.4280 - accuracy: 0.8457

[0.4280245749235153, 0.8457]

fig = plt.figure(figsize=(15,7))
for i in range(10):
    ax = plt.subplot(2,5,i+1)
    ax.imshow(val_images[i], cmap=plt.cm.binary)
    ax.set_title('Actual: {}'.format(class_names[val_labels[i]]))
    ax.set_xlabel('Predicted: {}'.format(class_names[np.argmax(conv_model.predict([[conv_val_images[i]]]))]))

Compare the Models</h1>

Artificial Neural Network

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 32)                25120     
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                170       
=================================================================
Total params: 25,818
Trainable params: 25,818
Non-trainable params: 0
_________________________________________________________________

Convolutional Neural Network

conv_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              multiple                  80        
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
conv2d_1 (Conv2D)            multiple                  584       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
_________________________________________________________________
flatten_1 (Flatten)          multiple                  0         
_________________________________________________________________
dense_3 (Dense)              multiple                  6288      
_________________________________________________________________
dense_4 (Dense)              multiple                  136       
_________________________________________________________________
dense_5 (Dense)              multiple                  90        
=================================================================
Total params: 7,178
Trainable params: 7,178
Non-trainable params: 0
_________________________________________________________________

Convolutional Layers have less Trainable-Parameters and are comparatively less expensive than Dense/Recurrent Layers

Especially with GPU, when Convolutional Compuations are Parallelized with large Bandwidths, Convolutional Neural Networks are very effective and efficient

Can't calculate later layers before earlier layers are done, thus not too much difference between CPU and GPU

But intra-layer, each node can be calculated simultaneously, thus GPUs show advantage in parallelized computations with larger bandwidths, especially for Convolutional Layers where the same set of (3x3) Parameters are used to multiply on each set of (3x3) Pixels.

5) Exercise with CIFAR10 Dataset

return to top

Details on CIFAR 10 Dataset

from tensorflow.keras.datasets import cifar10

data = cifar10.load_data() # ~170 Megabytes of 60,000 Images in 32 x 32 x 3

(X_train, Y_train), (X_val, Y_val) = data

X_train.shape, Y_train.shape, X_val.shape, Y_val.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

img_size = X_train.shape[1]
num_channels = X_train.shape[3]

class_names = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog', 
    'frog',
    'horse',
    'ship',
    'truck'
]

num_classes = len(class_names)

fig = plt.figure(figsize=(15,8))
for i, image in enumerate(X_train[:10]):
    ax = plt.subplot(2, 5, i+1)
    ax.imshow(image)
    ax.set_title('Actual: {}'.format(class_names[Y_train[i][0]]))

Scale Pixel Values to 0-1

X_train = X_train / 255.
X_val = X_val / 255.

X_train.shape

(50000, 32, 32, 3)

Build Model

conv_model = K.Sequential([
    K.layers.Conv2D(32, (3,3), padding='same', activation='relu', input_shape=(32,32,3,)),
    K.layers.MaxPooling2D(pool_size=(2,2)),
    K.layers.Conv2D(16, (3,3), padding='same', activation='relu'),
    K.layers.MaxPooling2D(pool_size=(2,2)),
    K.layers.Conv2D(8, (3,3), padding='same', activation='relu'),
    K.layers.MaxPooling2D(pool_size=(2,2)),
    K.layers.Flatten(),
    K.layers.Dense(32, activation='relu'),
    K.layers.Dense(num_classes, activation='softmax')
])

conv_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Train Model

conv_model.fit(X_train, Y_train, epochs=2, batch_size=32)

Epoch 1/2
50000/50000 [==============================] - 30s 608us/sample - loss: 1.7030 - accuracy: 0.3738
Epoch 2/2
50000/50000 [==============================] - 30s 607us/sample - loss: 1.4064 - accuracy: 0.4897

<tensorflow.python.keras.callbacks.History at 0x1523be95128>

The good thing about working in Jupyter Notebook is that you can continue training the model in the next cell without saving the model

# Epoch 3
conv_model.fit(X_train, Y_train, epochs=1, batch_size=32, verbose=0) # You can play around with the different arguments 
# Verbose 0 means Silent (Nothing printed out)

<tensorflow.python.keras.callbacks.History at 0x1525f949198>

# Epoch 4
conv_model.fit(X_train, Y_train, epochs=1, batch_size=32, verbose=2) # You can play around with the different arguments 
# Verbose 2 means no Progress Bar (Default is Verbose 1, with Progress Bar)

50000/50000 - 30s - loss: 1.1948 - accuracy: 0.5775

<tensorflow.python.keras.callbacks.History at 0x1525f949978>

# Epoch 5
conv_model.fit(X_train, Y_train, epochs=1, batch_size=32, ) # You can play around with the different arguments 
# Verbose 2 means no Progress Bar (Default is Verbose 1, with Progress Bar)

50000/50000 [==============================] - 30s 609us/sample - loss: 1.1297 - accuracy: 0.6014

<tensorflow.python.keras.callbacks.History at 0x1525f949e80>

Evaluate Performance of Model

conv_model.evaluate(X_val, Y_val)

10000/10000 [==============================] - 2s 190us/sample - loss: 1.1030 - accuracy: 0.6059

[1.102982096672058, 0.6059]

fig = plt.figure(figsize=(15,7))
for i in range(10):
    ax = plt.subplot(2,5,i+1)
    ax.imshow(X_val[i], cmap=plt.cm.binary)
    ax.set_title('Actual: {}'.format(class_names[Y_val[i][0]]))
    ax.set_xlabel('Predicted: {}'.format(class_names[np.argmax(conv_model.predict([[X_val[i]]]))]))

Saving / Loading the Model

Saving the Model

conv_model.save('conv_model.h5')

Deleting the Model

del conv_model

try: print(conv_model)
except: print("NameError: name 'conv_model' is not defined")

NameError: name 'conv_model' is not defined

Loading the Model from saved file

conv_model = K.models.load_model('conv_model.h5')

conv_model.evaluate(X_val, Y_val)

10000/10000 [==============================] - 2s 189us/sample - loss: 1.1030 - accuracy: 0.6059

[1.102982096672058, 0.6059]

Testing the Model with New Images

import os
from PIL import Image

root = './sources/test_images/'

full_images = []
filenames = [name for name in os.listdir(root)]

for filename in filenames:
    img = Image.open(root+filename)
    img.load()
    full_images.append(np.asarray(img, dtype='int32'))

fig = plt.figure(figsize=(15,8))
for i, image in enumerate(full_images):
    ax = plt.subplot(2, 5, i+1)
    ax.imshow(image)
    ax.set_title('Filename: {}'.format(filenames[i]))

Reload images as 32 x 32 x 3 Arrays

images = []
filenames = [name for name in os.listdir(root)]
root = './sources/test_images/'
for filename in filenames:
    img = Image.open(root+filename)
    img.load()
    img = img.resize(size=(img_size, img_size))
    images.append(np.asarray(img, dtype='int32'))
images = np.array(images)

fig = plt.figure(figsize=(15,8))
for i, image in enumerate(images):
    ax = plt.subplot(2, 5, i+1)
    ax.imshow(image)
    ax.set_title('Filename: {}'.format(filenames[i]))

Predictions

preds = np.argmax(conv_model.predict([images]), 1)

print(preds)

[0 3 6 1 7 7 5 5 9 9]

fig = plt.figure(figsize=(15,8))
for i, image in enumerate(full_images):
    ax = plt.subplot(2, 5, i+1)
    ax.imshow(image)
    ax.set_title('Filename: {}'.format(filenames[i]))
    ax.set_xlabel('Predicted: {}'.format(class_names[preds[i]]))