I am trying to extract the last layer of a classification model trained on some data. The first layer is an Embedding
layer, followed by the bilstm
and the followed by the output dense layer. My code is sown below. I keep getting a 4d output (1,38,300,300)
instead of a 3d (1,38,300)
. 1 is the sample size, 38 is the max length of the sentence, and 300 is the word2vec length.
from keras import backend as K
from tensorflow.keras.models import load_model
import numpy as np
import gensim
word2vec = 'GoogleNews-vectors-negative300.txt'
x_matrix = np.zeros((1, 38, 300))
sentene_label = 'the weather today was extremely unpredictable,0'
parts = sentene_label.split(',')
label = int(parts[1])
sentence = parts[0]
words = sentence.split(' ')
words = words[:x_matrix.shape[1]]
for j, word in enumerate(words):
if word in word2vec:
# x_matrix[0, j, :] = word2vec[word]
x_matrix[0, j, :] = loaded_model.word_vec(word)
model = load_model('TrainedModel.h5')
get_3rd_layer_output = K.function([model.layers[0].input], [model.layers[2].output])
layer_output = get_3rd_layer_output(x_matrix)[0]
print("Layer Output Shape 1 : ", layer_output.shape)
I have cross-checked my code several times and I can't seem to figure out why the dimensions are wrong.
this is the Traceback
Traceback (most recent call last):
File "/usr/pkg/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3427, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-bb840b495480>", line 1, in <module>
runfile('/am/vuwstocoisnrin1.vuw.ac.nz/ecrg-solar/kosimadukwe/Data Augmentation/test.py', wdir='/am/vuwstocoisnrin1.vuw.ac.nz/ecrg-solar/kosimadukwe/Data Augmentation')
File "/am/embassy/vol/x6/jetbrains/apps/PyCharm-P/ch-0/201.7846.77/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/am/embassy/vol/x6/jetbrains/apps/PyCharm-P/ch-0/201.7846.77/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/am/vuwstocoisnrin1.vuw.ac.nz/ecrg-solar/kosimadukwe/Data Augmentation/test.py", line 451, in <module>
layer_output = get_3rd_layer_output(x_matrix)[0]
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/backend.py", line 4073, in func
outs = model(model_inputs)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1012, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 424, in call
return self._run_internal_graph(
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 560, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/layers/wrappers.py", line 539, in __call__
return super(Bidirectional, self).__call__(inputs, **kwargs)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 998, in __call__
input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py", line 219, in assert_input_compatibility
raise ValueError('Input ' + str(input_index) + ' of layer ' +
ValueError: Input 0 of layer bidirectional_9 is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (1, 38, 300, 300)
The error is triggered on line
layer_output = get_3rd_layer_output(x_matrix)[0]
The shape of x_matrix before calling get_3rd_layer_output is
The shape of X matrix : (60, 38, 300)
TrainedModels architecture
model = Sequential()
model.add(Embedding(vocab_size, 300, input_length=38, weights=[embedding_matrix], trainable=True))
model.add(Bidirectional(LSTM(100, dropout=0.2)))
model.add(Dense(3, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adagrad', metrics=['accuracy'])
model.summary()
es = EarlyStopping(monitor='val_loss', mode='min', baseline=0.3, patience=100, verbose=1)
mc = ModelCheckpoint('TrainedModel.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
hist = model.fit(train_sequences, train_y, epochs=200, verbose=False, batch_size=100,validation_data=(val_sequences, val_y),callbacks=[es, mc])
TrainedModels model.summary is
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_9 (Embedding) (None, 38, 300) 7370400
_________________________________________________________________
bidirectional_9 (Bidirection (None, 200) 320800
_________________________________________________________________
dense_9 (Dense) (None, 3) 603
=================================================================
Total params: 7,691,803
Trainable params: 7,691,803
Non-trainable params: 0
_________________________________________________________________
The correct way to get any intermediate layer output is to create a sub-model that expects the same input of your trained model. In your case, the error raises because you pass to your trained model the 3D embedding matrix while you have to pass the same data you use for training (2D data whit integer-encoded words).
Here I produce a dummy example to extract correctly any intermediate output from your model.
Create dummy data:
vocab_size = 111
emb_size = 300
input_length = 38
n_sample = 50
n_classes = 3
embedding_matrix = np.random.uniform(-1,1, (vocab_size, emb_size))
X = np.random.randint(0,vocab_size, (n_sample, input_length))
Y = np.random.randint(0,n_classes, (n_sample,))
Create model and train:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras import backend as K
model = Sequential()
model.add(Embedding(vocab_size, emb_size, input_length=input_length,
weights=[embedding_matrix], trainable=True))
model.add(Bidirectional(LSTM(100, dropout=0.2)))
model.add(Dense(n_classes, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='Adagrad', metrics=['accuracy'])
model.fit(X,Y, epochs=3) ### TRAINED WITH X
Get layer output:
layer_id = 2
get_layer_output = K.function([model.layers[0].input], [model.layers[layer_id].output])
layer_output = get_layer_output(X)[0] ### EXTRACT FROM X
# equal to:
# sub_model = Model(model.input, model.layers[layer_id].output)
# layer_output = sub_model.predict(X) ### EXTRACT FROM X