Search code examples
pythonimagemachine-learningkeras

Concatenate original train dataset of images with augmented train dataset


I have written the following code for loading train and test data, I have augmented the train data set, but I would like to concatenate original train data set with augmented one. How can I do it?

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1./255, 
        rotation_range=5,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_dir = 'train_separated'
test_dir = 'test_separated'
batch_size = 128
img_height = 100
img_width = 100
num_classes = 10

# load train and test data
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical', 
    subset='training')

# after that I have train_data that was augmented, but how to concatenete new augmented data with original train data?

val_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical', 
    subset='validation')

test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

I expect that my train data will contain augmented train data and original data.


Solution

  • I discovered a way. Here I give you an example:

    import tensorflow as tf
    train_dir = "images/"
    img_height = 32
    img_width = 32
    batch_size = 16
    
    #build the generators
    train_data = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255
    )
    
    train_generator = train_data.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size, 
        class_mode='binary',
        shuffle=True
    )
    
    aug_train_data = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255, 
        rotation_range=5,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1
    )
    
    aug_train_generator = aug_train_data.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=True
    )
    
    #now let's combine these
    train_ds = tf.data.Dataset.from_generator(
        lambda: train_generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, img_height, img_width, 3], [None,])  #here are the shapes
    )
    
    aug_train_ds = tf.data.Dataset.from_generator(
        lambda: aug_train_generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, img_height, img_width, 3], [None,])
    )
    
    # concatenate the two datasets
    train_ds = train_ds.concatenate(aug_train_ds)
    
    # shuffle
    train_ds = train_ds.shuffle(buffer_size=5)
    
    #classification example
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, GlobalMaxPool2D
    
    model = Sequential([
        Conv2D(4, (3,3), activation='relu', input_shape=(img_height, img_width,3)),
        MaxPooling2D((2,2)),
        Conv2D(8, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        Conv2D(16, (3,3), activation='relu'),
        GlobalMaxPool2D(),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    
    steps_per_epoch = len(train_generator) + len(aug_train_generator) #this is mandatory, otherwise it will keep looping
    
    model.fit(train_ds, steps_per_epoch=steps_per_epoch, epochs=5)