python tensorflow keras deep-learning heatmap

tf.GradientTape().gradient() is returning None

I am trying to generate heatmaps of an input image for a model that I have created using the pretrained tensorflow XceptionNet.

My model structure is:

from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Dense, Flatten, Dropout, AveragePooling2D, Concatenate, GlobalAveragePooling2D, BatchNormalization, ReLU, Add, SeparableConv2D
from tensorflow.keras.applications import Xception

def xception(img_shape, n_classes):
    xceptionnet = Xception(input_shape=img_shape, include_top=False, weights='imagenet')
    xceptionnet.trainable = False

    input = Input(img_shape)
    x = xceptionnet(input, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate = 0.2)(x)

    output = Dense(n_classes, activation='softmax')(x)

    model = Model(input, output)
    return model

input_shape = (256, 256, 3)
n_classes = 3

model = xception(input_shape, n_classes)
model.compile('Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model Structure [Output of model.summary()]

I have tried to use the same format as mentioned in the keras documentation (https://keras.io/examples/vision/grad_cam/) to generate the heatmaps for an image in my dataset.

So based on the documentation, the displaying image part for my model is:

from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.cm as cm

img_size = (256, 256, 3)
preprocess_input = keras.applications.xception.preprocess_input
decode_predictions = keras.applications.xception.decode_predictions

last_conv_layer_name = "xception"

# The local path to our target image
img_path = '/content/drive/My Drive/Colab Notebooks/data/Malignant/Malignant case (1).jpg'

display(Image(img_path))

This above part is working perfectly.

But now when I am executing the part:

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions

    last_conv_layer = model.get_layer(last_conv_layer_name)
    new_model = tf.keras.models.Sequential()
    for layer in model.layers[:model.layers.index(last_conv_layer)+1]:
        new_model.add(layer)
    new_model.add(tf.keras.layers.Flatten())
    grad_model = tf.keras.models.Model(inputs=[new_model.input], outputs=[new_model.output, model.output])

    
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        last_conv_layer_output = tf.reshape(last_conv_layer_output, shape=(8, 8, 2048))
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

from tensorflow.keras.models import load_model

# Prepare image
img_array = preprocess_input(get_img_array(img_path, size=img_size))

# Make model
model = load_model('/content/drive/My Drive/Colab Notebooks/models/imageclassifier1.h5')

# Remove last layer's softmax
model.layers[-1].activation = None

# Print what the top predicted class is
preds = model.predict(img_array)
preds = np.argmax(preds[0])

labels = { 0 : "Cat",
           1 : "Dog",
           2 : "Human"}

# print("Predicted:", decode_predictions(preds, top=1)[0])  # For pre-trained models
print("Predicted:", labels[preds])

# Generate class activation heatmap
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)

# Display heatmap
plt.matshow(heatmap)
plt.show()

I am getting the error in the line pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) saying that grad is None.

Below is the error message that I am receiving:

ValueError                                Traceback (most recent call last)
<ipython-input-86-e8872c6548f7> in <cell line: 25>()
     23 
     24 # Generate class activation heatmap
---> 25 heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)
     26 
     27 # Display heatmap

<ipython-input-84-cf963b881b8a> in make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index)
     50     # This is a vector where each entry is the mean intensity of the gradient
     51     # over a specific feature map channel
---> 52     pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
     53     print(pooled_grads)
     54 

/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
    151     except Exception as e:
    152       filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153       raise e.with_traceback(filtered_tb) from None
    154     finally:
    155       del filtered_tb

/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
    101       dtype = dtypes.as_dtype(dtype).as_datatype_enum
    102   ctx.ensure_initialized()
--> 103   return ops.EagerTensor(value, ctx.device_name, dtype)
    104 
    105 

ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.

I am pretty new to this stuff so can someone please help.

Solution

It is throwing an error because your model has a nested structure (the xception model inside the bigger classification model).

You can bypass this by passing input throughout the layers again to build the model. Get the xception layer output as the last convolutional output, and pass the tensor through the end of the layer to get the final output. Then finally combine the output in a new model like this:

inputs = keras.Input((256, 256, 3))
xception = model.get_layer("xception")
last_conv_output = xception(inputs)
x = last_conv_output
for idx in range(2, len(model.layers)):
    x = model.layers[idx](x)
output = x

grad_model = keras.Model(inputs, [last_conv_output, output])

Generating the heatmap by refactoring the above code:

import tensorflow as tf

def make_gradcam_heatmap(inputs, grad_model, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions

    
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(inputs)
        #print(preds)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        #print(pred_index)
        class_channel = preds[:, pred_index]
        #print(class_channel)

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

The output:

inputs = tf.random.uniform((1, 256, 256, 3))
make_gradcam_heatmap(inputs, grad_model)

array([[0.        , 0.        , 0.        , 0.0019973 , 0.00224069,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.01608142, 0.13859619, 0.19170615,
        0.03831227, 0.        , 0.        ],
       [0.00344855, 0.15640435, 0.39062408, 0.57898533, 0.72229344,
        0.18632776, 0.08909718, 0.00205518],
       [0.05994121, 0.41158128, 0.55284446, 0.8489698 , 0.96675164,
        0.34517574, 0.30315596, 0.05326002],
       [0.07081833, 0.4438232 , 0.6151547 , 0.9064342 , 0.9261135 ,
        0.41782287, 0.34709153, 0.09646279],
       [0.00530773, 0.22800735, 0.52887404, 0.8523431 , 1.        ,
        0.5120882 , 0.23707563, 0.        ],
       [0.        , 0.03709193, 0.20877707, 0.5426089 , 0.53451735,
        0.24202193, 0.        , 0.        ],
       [0.        , 0.01366277, 0.03030644, 0.14712998, 0.19128165,
        0.        , 0.        , 0.        ]], dtype=float32)