Search code examples
tensorflowmodeldataset

Model not learning with tf.keras.preprocessing.image_dataset_from_directory


I tried to make a simple model to recognize images of animals (3 classes), with the function tf.keras.preprocessing.image_dataset_from_directory, when i fit the model with the train and validation dataset, it seems to work, but on evaluate and predict, it find always the same class, I dont know why, here is my code :

import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
import os

i = 0
def fast_scandir(dirname):
    subfolders= [f.path for f in os.scandir(dirname) if f.is_dir()]
    for dirname in list(subfolders):
        subfolders.extend(fast_scandir(dirname))
    return subfolders
classnames = fast_scandir("/home/someone/Documents/machinelearning/path/testimage")
for name in classnames:
    classnames[i] = classnames[i].replace("/home/someone/Documents/machinelearning/path/images/","")
    i+=1
i=0
classesnum = len(classnames)
#print(classesnum)
img_height = 256
img_width = 256
batch_size = 16
IMAGE_SIZE = 256


data_dir = "/home/someone/Documents/machinelearning/path/testimage"
test_dir = "/home/someone/Documents/machinelearning/path/testimage"

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  label_mode="categorical",
  seed=123,
  shuffle=False,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode='rgb')
print(train_ds)
'''
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
  test_dir,
  label_mode="categorical",
  seed=123,
  shuffle=False,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode='rgb')
'''
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  label_mode="categorical",
  seed=123,
  shuffle=False,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode='rgb')


'''
#####debug######
for images,labels in train_ds.take(1):
    for i in range(len(labels)):
        img = plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(str(labels[i]))
        
        plt.show()
'''

#tf.data.experimental.cardinality(train_ds)
#print(val_ds)
#print(np.shape(val_ds))
#print(type(val_ds))

import matplotlib.pyplot as plt





pretrained_model= tf.keras.applications.ResNet50(include_top=False,
                   input_shape=(img_width,img_height,3),
                   pooling='avg',classes=classesnum,
                   weights='imagenet')
for layer in pretrained_model.layers:
        layer.trainable=False

from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import optimizers

model = Sequential()

model.add(Conv2D(32,(3,3),input_shape=(IMAGE_SIZE,IMAGE_SIZE,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(96,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dense(3,activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(learning_rate=3e-4),
              metrics=['accuracy'])

history = model.fit(train_ds, validation_data=val_ds, epochs=10)

fig1 = plt.gcf()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.axis(ymin=0.4,ymax=1)
plt.grid()
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train', 'validation'])
plt.show()
model.save("/home/someone/Documents/machinelearning/path/model.h5")
'''
evaluation = model.evaluate(test_ds, return_dict=True)


for name, value in evaluation.items():
  print(f"{name}: {value:.4f}")
'''

I think its something that is set wrong like a bad number of classes somewhere..


Solution

  • It seems that setting shuffle=True in the dataset, resolved the issue, because somehow just a part of the dataset is used in my model.