I'm trying to replicate the performance of VGG-16 mentioned here: https://github.com/keras-team/keras-applications
But when I run the model on the imagenet dataset from tensorflow datasets, I get a lower top5 accuracy of 0.866.
This is my code:
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import applications
import tensorflow.keras.applications.vgg16 as vgg16
def scale16(image, label):
i = image
i = tf.cast(i, tf.float32)
i = tf.image.resize(i, (224,224))
i = vgg16.preprocess_input(i)
return (i, label)
def batch_set(dataset, batch_size):
return dataset.map(scale16) \
.shuffle(1000) \
.batch(batch_size) \
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
def create_batched_datasets(map_fn, data_dir = "/content", batch_size = 64):
datasets, info = tfds.load(name="imagenet2012",
with_info=True,
as_supervised=True,
download=False,
data_dir=data_dir
)
train = batch_set(datasets['train'], batch_size)
val = batch_set(datasets['validation'], batch_size)
return train, val, info
train, test_dataset, info = create_batched_datasets(scale16)
model = vgg16.VGG16(weights='imagenet', include_top=True)
model.compile('sgd', 'categorical_crossentropy',
['sparse_categorical_accuracy','sparse_top_k_categorical_accuracy'])
model.evaluate(test_dataset)
What am i missing? I'm running the code on google colab.
The code does not preprocess the images correctly. the tf.image.resize() method scales the image down. But according to the keras website, the 224x224x3 images should be created by a center crop. Changing the scale16() method solves the problem:
def resize_image(image, shape = (224,224)):
target_width = shape[0]
target_height = shape[1]
initial_width = tf.shape(image)[0]
initial_height = tf.shape(image)[1]
im = image
ratio = 0
if(initial_width < initial_height):
ratio = tf.cast(256 / initial_width, tf.float32)
h = tf.cast(initial_height, tf.float32) * ratio
im = tf.image.resize(im, (256, h), method="bicubic")
else:
ratio = tf.cast(256 / initial_height, tf.float32)
w = tf.cast(initial_width, tf.float32) * ratio
im = tf.image.resize(im, (w, 256), method="bicubic")
width = tf.shape(im)[0]
height = tf.shape(im)[1]
startx = width//2 - (target_width//2)
starty = height//2 - (target_height//2)
im = tf.image.crop_to_bounding_box(im, startx, starty, target_width, target_height)
return im
def scale16(image, label):
i = image
i = tf.cast(i, tf.float32)
i = resize_image(i, (224,224))
i = vgg16.preprocess_input(i)
return (i, label)