Tensorflow,ensemble learning, feed multiple datasets of different image size into Keras Model

I have an image classification problem for which I'd like to combine different models I've trained.

for example I have two models:

mobilenet_v2_100_96    = tf.keras.models.load_model("saved_model_mobilenet_v2_100_96")
mobilenet_v2_100_224    = tf.keras.models.load_model("saved_model_mobilenet_v2_100_224")

After which I lock the layers of the models and combine them into an array called "models" and create a new model to combine these like so:

ensemble_visible = [model.input for model in models]
ensemble_outputs = [model.output for model in models]

merge = tf.keras.layers.concatenate(ensemble_outputs)
merge = tf.keras.layers.Dense(200, activation='relu')(merge)
output = tf.keras.layers.Dense(200, activation='sigmoid')(merge)
model = tf.keras.models.Model(inputs=ensemble_visible, outputs=ensemble_outputs)

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.005, momentum=0.9),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
                metrics=['accuracy'])

As you can see I need two differently sized inputs in order to make it work.

I obtain my two datasets through the tf.keras.preprocessing.image_dataset_from_directory function.

Now I need a way to combine these two datasets. I made sure to set the seed value of both calls the same, so the images should be in the same order.

I've tried splitting the datasets via this tf_unzip (https://stackoverflow.com/a/68661507/20617581) function. And combining them like this:

model_inputs = tf.data.Dataset.zip(({"inputmobilenet_v2_100_96":train_x_96, "inputmobilenet_v2_100_224":train_x_224}, train_y_96))

When I run the model.fit function however the model is not able to learn above 1% which is way less than the original models. If I run the same model using only one input everything works as expected.

My complete code is following:

import tensorflow as tf
from utils.model import Model
from tqdm import tqdm
import numpy as np
import pandas as pd
import tensorflow_datasets as tfds

def tfdata_unzip(
    tfdata: tf.data.Dataset,
    *,
    recursive: bool=False,
    eager_numpy: bool=False,
    num_parallel_calls: int=tf.data.AUTOTUNE,
):
    """
    Unzip a zipped tf.data pipeline.

    Args:
        tfdata: the :py:class:`tf.data.Dataset`
            to unzip.

        recursive: Set to ``True`` to recursively unzip
            multiple layers of zipped pipelines.
            Defaults to ``False``.

        eager_numpy: Set this to ``True`` to return
            Python lists of primitive types or
            :py:class:`numpy.array` objects. Defaults
            to ``False``.

        num_parallel_calls: The level of parallelism to
            each time we ``map()`` over a
            :py:class:`tf.data.Dataset`.

    Returns:
        Returns a Python list of either
             :py:class:`tf.data.Dataset` or NumPy
             arrays.
    """
    if isinstance(tfdata.element_spec, tf.TensorSpec):
        if eager_numpy:
            return list(tfdata.as_numpy_iterator())
        return tfdata
        
    
    def tfdata_map(i: int) -> list:
        return tfdata.map(
            lambda *cols: cols[i],
            deterministic=True,
            num_parallel_calls=num_parallel_calls,
        )

    if isinstance(tfdata.element_spec, tuple):
        num_columns = len(tfdata.element_spec)
        if recursive:
            return [
                tfdata_unzip(
                    tfdata_map(i),
                    recursive=recursive,
                    eager_numpy=eager_numpy,
                    num_parallel_calls=num_parallel_calls,
                )
                for i in range(num_columns)
            ]
        else:
            return [
                tfdata_map(i)
                for i in range(num_columns)
            ]

    raise ValueError(
        "Unknown tf.data.Dataset element_spec: " +
        str(tfdata.element_spec)
    )

print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

models = []

# mobilenet_v2_050_160    = tf.keras.models.load_model("saved_model_mobilenet_v2_050_160")
mobilenet_v2_100_96    = tf.keras.models.load_model("saved_model_mobilenet_v2_100_96")
mobilenet_v2_100_224    = tf.keras.models.load_model("saved_model_mobilenet_v2_100_224")

# models.append(mobilenet_v2_050_160)
models.append(mobilenet_v2_100_96)
models.append(mobilenet_v2_100_224)

for i, model in enumerate(models):
    for layer in model.layers:
        layer.trainable = False
        layer._name = layer.name + str(i)

    model.input._name = "input_" + str(i)
    model.input.type_spec._name = "input_" + str(i)
    # model.summary()


ensemble_visible = [model.input for model in models]
ensemble_outputs = [model.output for model in models]



merge = tf.keras.layers.concatenate(ensemble_outputs)
merge = tf.keras.layers.Dense(200, activation='relu')(merge)
output = tf.keras.layers.Dense(200, activation='sigmoid')(merge)
model = tf.keras.models.Model(inputs=ensemble_visible, outputs=output)

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.005, momentum=0.9),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
                metrics=['accuracy'])


def build_dataset(image_size):
    return tf.keras.preprocessing.image_dataset_from_directory(
        "img",
        validation_split=.20,
        subset="both",
        label_mode="categorical",
        # Seed needs to provided when using validation_split and shuffle = True.
        # A fixed seed is used so that the validation set is stable across runs.
        seed=123,
        image_size=image_size,
        batch_size=16
    )

batch_size = 16

def gen_datasets(image_size):
    train_ds, val_ds = build_dataset(image_size)
    class_names = tuple(train_ds.class_names)
    train_size = train_ds.cardinality().numpy()
    train_ds = train_ds.unbatch().batch(batch_size)
    train_ds = train_ds.repeat()

    normalization_layer = tf.keras.layers.Rescaling(1. / 255)
    preprocessing_model = tf.keras.Sequential([normalization_layer])
    do_data_augmentation = False  # @param {type:"boolean"}
    if do_data_augmentation:
        preprocessing_model.add(tf.keras.layers.RandomRotation(40))
        preprocessing_model.add(tf.keras.layers.RandomTranslation(0, 0.2))
        preprocessing_model.add(tf.keras.layers.RandomTranslation(0.2, 0))
    preprocessing_model.add(tf.keras.layers.RandomZoom(0.2, 0.2))
    preprocessing_model.add(tf.keras.layers.RandomFlip(mode="horizontal"))
    train_ds = train_ds.map(lambda images, labels: (preprocessing_model(images), labels))

    valid_size = val_ds.cardinality().numpy()
    val_ds = val_ds.unbatch().batch(batch_size) #self.batch_size)
    val_ds = val_ds.map(lambda images, labels:
                        (normalization_layer(images), labels))

    return train_ds, val_ds, train_size, valid_size, class_names


train_ds_96, val_ds_96, train_size, valid_size, class_names = gen_datasets([96,96])
train_ds_224, val_ds_224, train_size, valid_size, class_names = gen_datasets([224,224])

print("aquired data")

train_x_96, train_y_96 = tfdata_unzip(train_ds_96)
train_x_224, train_y_224 = tfdata_unzip(train_ds_224)

val_x_96, val_y_96 = tfdata_unzip(val_ds_96)
val_x_224, val_y_224 = tfdata_unzip(val_ds_224)

model.summary()

model_inputs = tf.data.Dataset.zip(({"inputmobilenet_v2_100_96":train_x_96, "inputmobilenet_v2_100_224":train_x_224}, train_y_96))
model_vals = tf.data.Dataset.zip(({"inputmobilenet_v2_100_96":val_x_96, "inputmobilenet_v2_100_224":val_x_224}, val_y_96))

steps_per_epoch = train_size // batch_size
validation_steps = valid_size // batch_size
hist = model.fit(
    model_inputs,
    epochs=1,
    steps_per_epoch=steps_per_epoch,
    validation_data=model_vals,
    validation_steps=validation_steps).history

model.save("joined_model")

What i've tried so far

I've tried:

splitting the dataset with:

def fit_generator(dataset,len):
  df = tfds.as_numpy(dataset)
  X_ret = np.array([])
  Y_ret = np.array([])

  for a,b in tqdm(df, total=len):
    np.append(X_ret,a)
    np.append(Y_ret,b)

  return X_ret, Y_ret

which was to slow to handle the amount of data.

I've tried using a generator, but it was not accepted by the model.fit function like this:

def fit_generator(dataset,len):
  df = tfds.as_numpy(dataset)
  X_ret = np.array(e[0] for e in df)
  Y_ret = np.array(e[1] for e in df)

  return X_ret, Y_ret

Lastly i've tried the answer provided in the problem description tf_unzip (https://stackoverflow.com/a/68661507/20617581)

But the model does not learn in any significant way.

Solution

I used the ImageDataGenerator.flow_from_directory() and used a custom Generator to supply the model.fit function:

class JoinedGen(tf.keras.utils.Sequence):
    def __init__(self, input_gens):
        self.gens = input_gens

    def __len__(self):
        return len(self.gens[0])

    def __getitem__(self, i):
        x = [gen[i][0] for gen in self.gens]
        y = self.gens[0][i][1]

        return x, y

    def on_epoch_end(self):
        for gen in self.gens:
            gen.on_epoch_end()

def _build_generators(self):
        train_datagen = ImageDataGenerator(
            rescale=1 / 255.0,
            rotation_range=20,
            zoom_range=0.05,
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.05,
            horizontal_flip=True,
            fill_mode="nearest",
            validation_split=self.validation_split)

        sizes = [(96, 96), (224, 224), (299, 299), (224, 224), (160, 160)]
        train_gens = []
        val_gens = []

        for size in sizes:
            train_gens.append(
                train_datagen.flow_from_directory(
                    directory="img_enhanced",
                    target_size=size,
                    color_mode="rgb",
                    batch_size=self.batch_size,
                    class_mode="categorical",
                    subset='training',
                    shuffle=True,
                    seed=42
                )
            )
            val_gens.append(
                train_datagen.flow_from_directory(
                    directory="img_enhanced",
                    target_size=size,
                    color_mode="rgb",
                    batch_size=self.batch_size,
                    class_mode="categorical",
                    subset='validation',
                    shuffle=True,
                    seed=42
                )
            )
        
        return JoinedGen(train_gens), JoinedGen(val_gens)

        self.train_gens, self.val_gens = self._build_generators()


        model.fit(
            x=self.train_gens,
            epochs=1,
            steps_per_epoch=steps_per_epoch,
            validation_data=self.train_gens,
            validation_steps=validation_steps,
            batch_size=self.batch_size
        )