I am using the Inception v4 model to train a classifier on 3 classes A, B and C, each having roughly 900 images in the training dataset and 80 images in the validation set. I ran my training code for 200 epochs with a batch size of 8. I was getting an average validation accuracy of more than 99% with a very low loss:-
Epoch 199/200
303/303 [==============================] - 53s 174ms/step - loss: 0.0026 - accuracy: 0.9996 - val_loss: 5.1226e-04 - val_accuracy: 1.0000
Epoch 200/200
303/303 [==============================] - 53s 176ms/step - loss: 0.0019 - accuracy: 1.0000 - val_loss: 0.1079 - val_accuracy: 0.9750
When I run my test code on the images in directory A of the validation set, its predicting 80% of the images as class A, 20% as class C and nothing at all in class B. Same with class C (80% as C, 20% as A). And on directory B, all are being predicted as class A or C. In all three test cases, not a single image is being classified as class B by the test program, despite the high validation accuracy as well as the use of the exact same directory that was used for validation at training time (the latter also leads me to believe that its not caused primarily by overfitting).
This was the output of the test program on directory B:
25/25 [==============================] - 8s 186ms/step - loss: 0.0212 - accuracy: 0.9963
['loss', 'accuracy']
[0.02124088630080223, 0.9963099360466003]
Testing images located in val/B/
[[6.2504888e-01 8.8258091e-08 3.7495103e-01]]
A:62.5%
[[8.8602149e-01 1.3459101e-05 1.1396510e-01]]
A:88.6%
[[4.7189465e-01 4.0863368e-05 5.2806443e-01]]
C:52.81%
[[1.0370950e-01 2.7608112e-07 8.9629024e-01]]
C:89.63%
[[7.1212035e-01 3.3269991e-06 2.8787634e-01]]
A:71.21%
and so on.
I even tried to divide the line img = np.expand_dims(test_image, axis=0)
by 255, as described by another question I had asked elsewhere. It was successful in that case, but not so here.
Here is my training code:
def create_inception_v4(nb_classes, load_weights, checkpoint_path):
init = Input((299,299, 3))
x = inception_stem(init)
# 4 x Inception A
for i in range(4):
x = inception_A(x)
# Reduction A
x = reduction_A(x)
# 7 x Inception B
for i in range(7):
x = inception_B(x)
# Reduction B
x = reduction_B(x)
# 3 x Inception C
for i in range(3):
x = inception_C(x)
# Average Pooling
x = AveragePooling2D((8, 8))(x)
# Dropout - Use 0.2, as mentioned in official paper.
x = Dropout(0.2)(x)
x = Flatten()(x)
# Output
out = Dense(nb_classes, activation='softmax')(x)
model = Model(init, out, name='Inception-v4')
if load_weights:
weights = checkpoint_path
model.load_weights(weights, by_name=True)
print("Model weights loaded.")
return model
def train(args,check,checkpoint_path,network_name="inceptionv4"):
n_gpus=int(args['gpus'])
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
datagen=ImageDataGenerator(rescale=1/255,
rotation_range=40,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
fill_mode='nearest',
samplewise_std_normalization=True)
val_datagen = ImageDataGenerator(rescale=1/255)
batch_size = int(args["batch_size"])
train_generator = datagen.flow_from_directory(train_dir,target_size=(299,299),class_mode="categorical", batch_size=batch_size)
val_gen = datagen.flow_from_directory(val_dir,target_size=(299,299),class_mode="categorical", batch_size=batch_size)
mc = keras.callbacks.ModelCheckpoint(f"{network_name}_checkpoints/{network_name}.h5", save_weights_only=True, save_best_only=True)
tensorboard = TensorBoard(log_dir="{}/{}".format(args["log_dir"], time()))
validation_steps = 10
model = create_inception_v4(int(args["num_classes"]),check,checkpoint_path)
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.SGD(learning_rate=float(args['learning_rate']), decay=1e-6, momentum=0.9, nesterov=True), metrics=["accuracy"])
counter = Counter(train_generator.classes)
max_val = float(max(counter.values()))
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}
hist = model.fit(train_generator,epochs=num_epochs,verbose=True,validation_data=val_gen,validation_steps=validation_steps,callbacks=[mc, tensorboard], class_weight=class_weights)
model.save(f"checkpoints/{network_name}_{num_epochs}epochs.h5")
And here is my test code:
def test_model(test_dir, num_epochs,class_names, network_name="inceptionv4",):
model=load_model(f'checkpoints/{network_name}_{num_epochs}epochs.h5')
datagen=ImageDataGenerator(rescale=1/255,
rotation_range=40,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
fill_mode='nearest',
samplewise_std_normalization=True)
val_datagen = ImageDataGenerator(rescale=1/255)
val_dir = "val/"
val_gen = datagen.flow_from_directory(val_dir,target_size=(299,299),class_mode="categorical")
test_accuracy=model.evaluate(val_gen,steps=25)
print(model.metrics_names)
print(test_accuracy)
img_width, img_height = 299, 299
print(f"Testing images located in {test_dir}")
counter = 0
results_dict = {}
start_time = time.time()
for filename_img in os.listdir(test_dir):
counter += 1
filename = os.path.join(test_dir,filename_img)
img = image.load_img(filename, target_size=(img_width, img_height))
test_image = image.img_to_array(img)
test_image.shape
img = np.expand_dims(test_image, axis=0)/255
classes = model.predict(img, batch_size=10)
print(classes)
predicted_class = class_names[np.argmax(classes)]
if predicted_class not in results_dict.keys():
results_dict[predicted_class] = 1
else:
results_dict[predicted_class] += 1
print(f"{predicted_class}:{round(np.amax(classes)*100,2)}%")
if counter % 100 == 0:
print(f"{counter} files processed!")
time_taken = time.time() - start_time
time_taken = round(time_taken,2)
print(f"{counter} images processed in {time_taken} seconds, at a rate of {round(counter/time_taken,2)} images per second.")
for predicted_class in results_dict.keys():
print(f"{predicted_class} = {results_dict[predicted_class]} predictions")
What am I doing wrong?
Edit 1- I tried to account for imbalanced classes by adding the class_weight
parameter as shown in the edited code. Still not able to predict class B. I even tried using val_datagen
instead of datagen
that led to even worse results.
Edit 2- Now I copied my entire folder elsewhere, then deleted class B and retained classes A and C. I trained the model, once again got a very high training accuracy and now my test program can only predict class C and not class A. I have a feeling that I have made a really silly mistake in my test.py code.
This was a very frustrating error. I realised that I was getting a high validation accuracy on model.evaluate()
for an entire directory, but not so for model.predict()
on an individual image. This was because the Image Augmentation techniques that were applied for training were also used on validation but not on individual images fed as input to the model.
In this case, I realised the samplewise_std_normalization
wasn't applied to a test image. So I used the standardising function, as inspired by this answer- test_image = datagen.standardize(test_image)
, and now my model works perfectly. The full test.py code can be seen below:
def test_model(test_dir, num_epochs,class_names, network_name="inceptionv4",):
model=load_model(f'checkpoints/{network_name}_{num_epochs}epochs.h5')
datagen=ImageDataGenerator(rescale=1/255,
rotation_range=40,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
fill_mode='nearest',
samplewise_std_normalization=True)
val_datagen = ImageDataGenerator(rescale=1/255)
val_dir = "val/"
val_gen = datagen.flow_from_directory(val_dir,target_size=(299,299),class_mode="categorical")
test_accuracy=model.evaluate(val_gen,steps=25)
print(model.metrics_names)
print(test_accuracy)
img_width, img_height = 299, 299
print(f"Testing images located in {test_dir}")
counter = 0
results_dict = {}
start_time = time.time()
for filename_img in os.listdir(test_dir):
counter += 1
filename = os.path.join(test_dir,filename_img)
img = image.load_img(filename, target_size=(img_width, img_height))
test_image = image.img_to_array(img)
test_image = np.expand_dims(test_image, axis=0)
# Don't divide by 255, this is taken care of by the standardize function
test_image = datagen.standardize(test_image)
classes = model.predict(test_image, batch_size=10)
print(classes)
predicted_class = class_names[np.argmax(classes)]
if predicted_class not in results_dict.keys():
results_dict[predicted_class] = 1
else:
results_dict[predicted_class] += 1
print(f"{predicted_class}:{round(np.amax(classes)*100,2)}%")
if counter % 100 == 0:
print(f"{counter} files processed!")
time_taken = time.time() - start_time
time_taken = round(time_taken,2)
print(f"{counter} images processed in {time_taken} seconds, at a rate of {round(counter/time_taken,2)} images per second.")
for predicted_class in results_dict.keys():
print(f"{predicted_class} = {results_dict[predicted_class]} predictions")