Search code examples
pythontensorflowkerasgoogle-colaboratorytensorflow-datasets

Keras Applications - VGG16 low Accuracy on imagenet


I'm trying to replicate the performance of VGG-16 mentioned here: https://github.com/keras-team/keras-applications

But when I run the model on the imagenet dataset from tensorflow datasets, I get a lower top5 accuracy of 0.866.

This is my code:

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import applications
import tensorflow.keras.applications.vgg16 as vgg16

def scale16(image, label):
  i = image
  i = tf.cast(i, tf.float32)
  i = tf.image.resize(i, (224,224))
  i = vgg16.preprocess_input(i)
  return (i, label)

def batch_set(dataset, batch_size):
    return dataset.map(scale16) \
                  .shuffle(1000) \
                  .batch(batch_size) \
                  .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

def create_batched_datasets(map_fn, data_dir = "/content", batch_size = 64):
    datasets, info = tfds.load(name="imagenet2012", 
                            with_info=True, 
                            as_supervised=True, 
                            download=False, 
                            data_dir=data_dir
                            )
    train = batch_set(datasets['train'], batch_size)
    val = batch_set(datasets['validation'], batch_size)
    return train, val, info


train, test_dataset, info = create_batched_datasets(scale16)

model = vgg16.VGG16(weights='imagenet', include_top=True)

model.compile('sgd', 'categorical_crossentropy', 
              ['sparse_categorical_accuracy','sparse_top_k_categorical_accuracy'])

model.evaluate(test_dataset)

What am i missing? I'm running the code on google colab.


Solution

  • The code does not preprocess the images correctly. the tf.image.resize() method scales the image down. But according to the keras website, the 224x224x3 images should be created by a center crop. Changing the scale16() method solves the problem:

    
    def resize_image(image, shape = (224,224)):
      target_width = shape[0]
      target_height = shape[1]
      initial_width = tf.shape(image)[0]
      initial_height = tf.shape(image)[1]
      im = image
      ratio = 0
      if(initial_width < initial_height):
        ratio = tf.cast(256 / initial_width, tf.float32)
        h = tf.cast(initial_height, tf.float32) * ratio
        im = tf.image.resize(im, (256, h), method="bicubic")
      else:
        ratio = tf.cast(256 / initial_height, tf.float32)
        w = tf.cast(initial_width, tf.float32) * ratio
        im = tf.image.resize(im, (w, 256), method="bicubic")
      width = tf.shape(im)[0]
      height = tf.shape(im)[1]
      startx = width//2 - (target_width//2)
      starty = height//2 - (target_height//2)
      im = tf.image.crop_to_bounding_box(im, startx, starty, target_width, target_height)
      return im
    
    def scale16(image, label):
      i = image
      i = tf.cast(i, tf.float32)
      i = resize_image(i, (224,224))
      i = vgg16.preprocess_input(i)
      return (i, label)