I have written the following code for loading train and test data, I have augmented the train data set, but I would like to concatenate original train data set with augmented one. How can I do it?
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=5,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1,
validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)
train_dir = 'train_separated'
test_dir = 'test_separated'
batch_size = 128
img_height = 100
img_width = 100
num_classes = 10
# load train and test data
train_data = train_datagen.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical',
subset='training')
# after that I have train_data that was augmented, but how to concatenete new augmented data with original train data?
val_data = train_datagen.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical',
subset='validation')
test_data = test_datagen.flow_from_directory(
test_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
I expect that my train data will contain augmented train data and original data.
I discovered a way. Here I give you an example:
import tensorflow as tf
train_dir = "images/"
img_height = 32
img_width = 32
batch_size = 16
#build the generators
train_data = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255
)
train_generator = train_data.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
shuffle=True
)
aug_train_data = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
rotation_range=5,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1
)
aug_train_generator = aug_train_data.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
shuffle=True
)
#now let's combine these
train_ds = tf.data.Dataset.from_generator(
lambda: train_generator,
output_types=(tf.float32, tf.float32),
output_shapes=([None, img_height, img_width, 3], [None,]) #here are the shapes
)
aug_train_ds = tf.data.Dataset.from_generator(
lambda: aug_train_generator,
output_types=(tf.float32, tf.float32),
output_shapes=([None, img_height, img_width, 3], [None,])
)
# concatenate the two datasets
train_ds = train_ds.concatenate(aug_train_ds)
# shuffle
train_ds = train_ds.shuffle(buffer_size=5)
#classification example
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, GlobalMaxPool2D
model = Sequential([
Conv2D(4, (3,3), activation='relu', input_shape=(img_height, img_width,3)),
MaxPooling2D((2,2)),
Conv2D(8, (3,3), activation='relu'),
MaxPooling2D((2,2)),
Conv2D(16, (3,3), activation='relu'),
GlobalMaxPool2D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
steps_per_epoch = len(train_generator) + len(aug_train_generator) #this is mandatory, otherwise it will keep looping
model.fit(train_ds, steps_per_epoch=steps_per_epoch, epochs=5)