I'm using the following generator:
datagen = ImageDataGenerator(
fill_mode='nearest',
cval=0,
rescale=1. / 255,
rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.5,
horizontal_flip=True,
vertical_flip=True,
validation_split = 0.5,
)
train_generator = datagen.flow_from_dataframe(
dataframe=traindf,
directory=train_path,
x_col="id",
y_col=classes,
subset="training",
batch_size=8,
seed=123,
shuffle=True,
class_mode="other",
target_size=(64,64))
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
valid_generator = datagen.flow_from_dataframe(
dataframe=traindf,
directory=train_path,
x_col="id",
y_col=classes,
subset="validation",
batch_size=8,
seed=123,
shuffle=True,
class_mode="raw",
target_size=(64, 64))
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size
Now the problem is that the validation data is also being augmented which I guess is not something you'd want to do while training. How do I avoid this? I don't have two directories for train and validation. I want to use a single dataframe to train the network. Any suggestions?
The solution my friend found was using a different generator but with the same validation split and no shuffle.
datagen = ImageDataGenerator(
#featurewise_center=True,
#featurewise_std_normalization=True,
rescale=1. / 255,
rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.5,
horizontal_flip=True,
vertical_flip=True,
validation_split = 0.15,
)
valid_datagen=ImageDataGenerator(rescale=1./255,validation_split=0.15)
and then you can define the two generators as
train_generator = datagen.flow_from_dataframe(
dataframe=traindf,
directory=train_path,
x_col="id",
y_col=classes,
subset="training",
batch_size=64,
seed=123,
shuffle=False,
class_mode="raw",
target_size=(224,224))
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
valid_generator = valid_datagen.flow_from_dataframe(
dataframe=traindf,
directory=train_path,
x_col="id",
y_col=classes,
subset="validation",
batch_size=64,
seed=123,
shuffle=False,
class_mode="raw",
target_size=(224, 224))
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size