How to use OpenCV Videocapture With Saved Keras Model?

I made a simple cat vs dog detector using Keras and Tensorflow. Then I saved the model in .h5 format and the training in checkpoints. Now, I want to load the training and saved model and run it live with OpenCV Videocapture using a camera. How can I do this?

Here is the code I used for training:

import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from PIL import Image, ImageSequence
import os
import pathlib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import datetime

tf.compat.v1.enable_eager_execution()

epochs = 200
steps_per_epoch = 10
batch_size = 20
IMG_HEIGHT = 200
IMG_WIDTH = 200

train_dir = "Data/Train"
test_dir = "Data/Val"

train_image_generator = ImageDataGenerator(rescale=1. / 255)

test_image_generator = ImageDataGenerator(rescale=1. / 255)

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='sparse')

test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
                                                         directory=test_dir,
                                                         shuffle=True,
                                                         target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                         class_mode='sparse')




model = Sequential([
    Conv2D(265, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dense(144, activation="relu"),
    keras.layers.Dense(80, activation="softmax")
])

model.compile(optimizer='adam',
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])

model.summary()
tf.keras.utils.plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True, rankdir='TB')
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

os.system("rm -r logs")

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = model.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_data=test_data_gen,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.save('model.h5', include_optimizer=True)

test_loss, test_acc = model.evaluate(test_data_gen)
print("Tested Acc: ", test_acc)
print("Tested Acc: ", test_acc*100, "%")

I am using the COCO dataset to do image classification. I want to use a standard USB camera with OpenCV to do Videocapture and stream it live to the trained and exported model for prediction. How can I do this?

Solution

You can do something like this:

import cv2
import numpy as np
from PIL import Image
from keras import models
import os
import tensorflow as tf

checkpoint_path = "training_gender/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

model = models.load_model('gender.h5')
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
video = cv2.VideoCapture(0)

while True:
        _, frame = video.read()

        #Convert the captured frame into RGB
        im = Image.fromarray(frame, 'RGB')

        #Resizing into dimensions you used while training
        im = im.resize((109,82))
        img_array = np.array(im)

        #Expand dimensions to match the 4D Tensor shape.
        img_array = np.expand_dims(img_array, axis=0)

        #Calling the predict function using keras
        prediction = model.predict(img_array)#[0][0]
        print(prediction)
        #Customize this part to your liking...
        if(prediction == 1 or prediction == 0):
            print("No Human")
        elif(prediction < 0.5 and prediction != 0):
            print("Female")
        elif(prediction > 0.5 and prediction != 1):
            print("Male")

        cv2.imshow("Prediction", frame)
        key=cv2.waitKey(1)
        if key == ord('q'):
                break
video.release()
cv2.destroyAllWindows()