python tensorflow keras google-colaboratory tensorflow-datasets

Keras Applications - VGG16 low Accuracy on imagenet

I'm trying to replicate the performance of VGG-16 mentioned here: https://github.com/keras-team/keras-applications

But when I run the model on the imagenet dataset from tensorflow datasets, I get a lower top5 accuracy of 0.866.

This is my code:

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import applications
import tensorflow.keras.applications.vgg16 as vgg16

def scale16(image, label):
  i = image
  i = tf.cast(i, tf.float32)
  i = tf.image.resize(i, (224,224))
  i = vgg16.preprocess_input(i)
  return (i, label)

def batch_set(dataset, batch_size):
    return dataset.map(scale16) \
                  .shuffle(1000) \
                  .batch(batch_size) \
                  .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

def create_batched_datasets(map_fn, data_dir = "/content", batch_size = 64):
    datasets, info = tfds.load(name="imagenet2012", 
                            with_info=True, 
                            as_supervised=True, 
                            download=False, 
                            data_dir=data_dir
                            )
    train = batch_set(datasets['train'], batch_size)
    val = batch_set(datasets['validation'], batch_size)
    return train, val, info


train, test_dataset, info = create_batched_datasets(scale16)

model = vgg16.VGG16(weights='imagenet', include_top=True)

model.compile('sgd', 'categorical_crossentropy', 
              ['sparse_categorical_accuracy','sparse_top_k_categorical_accuracy'])

model.evaluate(test_dataset)

What am i missing? I'm running the code on google colab.

Solution

The code does not preprocess the images correctly. the tf.image.resize() method scales the image down. But according to the keras website, the 224x224x3 images should be created by a center crop. Changing the scale16() method solves the problem:


def resize_image(image, shape = (224,224)):
  target_width = shape[0]
  target_height = shape[1]
  initial_width = tf.shape(image)[0]
  initial_height = tf.shape(image)[1]
  im = image
  ratio = 0
  if(initial_width < initial_height):
    ratio = tf.cast(256 / initial_width, tf.float32)
    h = tf.cast(initial_height, tf.float32) * ratio
    im = tf.image.resize(im, (256, h), method="bicubic")
  else:
    ratio = tf.cast(256 / initial_height, tf.float32)
    w = tf.cast(initial_width, tf.float32) * ratio
    im = tf.image.resize(im, (w, 256), method="bicubic")
  width = tf.shape(im)[0]
  height = tf.shape(im)[1]
  startx = width//2 - (target_width//2)
  starty = height//2 - (target_height//2)
  im = tf.image.crop_to_bounding_box(im, startx, starty, target_width, target_height)
  return im

def scale16(image, label):
  i = image
  i = tf.cast(i, tf.float32)
  i = resize_image(i, (224,224))
  i = vgg16.preprocess_input(i)
  return (i, label)