Search code examples
pythontensorflowmachine-learningkeraslstm

LSTM occurs ValueError: Shapes (5, 2, 3) and (5, 3) are incompatible


I want to do time series multi-class classification with time-series data. Here the data set I have got needs to be preprocessed heavily and that just to get an idea of how to implement the model I have used the IRIS data set(not suitable for LSTM) since it has the exact same structure of the time series data I have( 4 input features,1 output feature, 120 samples). I have the following code implemented but it causes me the invalid shape error when fitting the model with a batch size of 5 (changed the batch size many times but didn't seem to make any change)

#load dataset
    dataframe = pandas.read_csv("iris.csv",header=None)
    dataset = dataframe.values
    X=dataset[:,0:4].astype(float)
    Y=dataset[:,4]
# Encode the output variables
    encoder = LabelEncoder()
    encoder.fit(Y)
    # convert output variables into the numbers
    encoded_Y = encoder.transform(Y)
    # Convert integers to dummy variables (one-hot encoded)
    dummy_Y = np_utils.to_categorical(encoded_Y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,dummy_Y,test_size=0.2) #20% is allocated for the testing
X_train = X_train.reshape(60, 2, 4)
y_train = y_train.reshape(60, 2, 3)
y_train.shape,X_train.shape

((60, 2, 3), (60, 2, 4))


 # Create the Neural Network Model
def create_nn_model():
#create sequential model
  model = Sequential()
  model.add(LSTM(100,dropout=0.2, input_shape=(X_train.shape[1],X_train.shape[2])))
  model.add(Dense(100, activation='relu'))
  model.add(Dense(3,activation='softmax'))
  # Compile model
  model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
  return model
model = create_nn_model()
model.summary()

> Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 100)               42000     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 303       
=================================================================
Total params: 52,403
Trainable params: 52,403
Non-trainable params: 0
model.fit(X_train,y_train,epochs=200,batch_size=5)

> ValueError                                Traceback (most recent call last)

<ipython-input-26-0aef33c299f0> in <module>()
----> 1 model.fit(X_train,y_train,epochs=200,batch_size=5) #X_train is independant variables. based on the amount of the data set data set will be trained by breaking into batches

9 frames

/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    984           except Exception as e:  # pylint:disable=broad-except
    985             if hasattr(e, "ag_error_metadata"):
--> 986               raise e.ag_error_metadata.to_exception(e)
    987             else:
    988               raise

ValueError: in user code:

    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:830 train_function  *
        return step_function(self, iterator)
    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:813 run_step  *
        outputs = model.train_step(data)
    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:771 train_step  *
        loss = self.compiled_loss(
    /usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py:201 __call__  *
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /usr/local/lib/python3.7/dist-packages/keras/losses.py:142 __call__  *
        losses = call_fn(y_true, y_pred)
    /usr/local/lib/python3.7/dist-packages/keras/losses.py:246 call  *
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper  **
        return target(*args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/keras/losses.py:1631 categorical_crossentropy
        y_true, y_pred, from_logits=from_logits)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.7/dist-packages/keras/backend.py:4827 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (5, 2, 3) and (5, 3) are incompatible

Solution

  • Your y_true and y_pred are not in the same shape. You may need to define your LSTM in the following way

    model.add(LSTM(100,dropout=0.2, input_shape=(2,4), return_sequences=True))
    ....
    
    Model: "sequential_1"
    _________________________________________________________________
    Layer (type)                 Output Shape              Param #   
    =================================================================
    ....
    dense_3 (Dense)              (None, 2, 3)              303        < ---
    =================================================================
    

    Update

    Using return_sequences = True would work because you define your Training-Paris in that way:

    X_train = X_train.reshape(60, 2, 4)
    y_train = y_train.reshape(60, 2, 3)
    

    which represent (batch_size, timestep, input_lenght); but note that you need to reshape or fulfill the input requirement of the LSTM layer in your above model and not the y_train. However, when you define your model, you don't use the return sequence and it makes the last layer have only the three classifiers without timestep, but your y_train is defined in that way. But if you set the return sequence to True and plot your model summary, you would see that the last layer will have an output shape of (None, 2, 3) which exactly matches the shape of y_train.

    Before understanding what the return_sequence is doing here, you may need to know what timestep means in an LSTM Model, check this answer. AFAIK, it depends on how many timesteps you need to set for your input; I can make a single occurrence of the LSTM cell or multiple times (n-th timestep). And for n-th timestep (n: {1,2,3..N), if I want from LSTM to return all timestep output (n numbers), then I will set return_sequence = True, but else return_sequence = False. From doc,

    return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. Default: False.

    In short, if it sets as True, all sequences will return but if it's False, then only the last output will. For example:

    inputs = tf.random.normal([32, 8])
    inputs = tf.reshape(inputs, [-1, 2, 4 ]) # or [-1, 4, 2] # or [-1, 1, 8]
    inputs.shape 
    TensorShape([32, 2, 4]) # (batch_size, timestep, input_length)
    
    lstm = tf.keras.layers.LSTM(10, return_sequences=True)
    whole_seq_output = lstm(inputs)
    print(whole_seq_output.shape)
    (32, 2, 10) # (batch_size, timestep, output_length)
    
    lstm = tf.keras.layers.LSTM(10, return_sequences=False)
    last_seq_output = lstm(inputs)
    print(last_seq_output.shape)
    (32, 10) # (batch_size, output_length)
    

    Here is a one-way approach to your above code. Iris data took from here.

    import pandas 
    dataframe = pandas.read_csv("/content/iris.csv")
    dataframe.head(3)
    
      sepal.length  sepal.width petal.length    petal.width   variety
    0   5.1              3.5         1.4             0.2      Setosa
    1   4.9              3.0         1.4             0.2      Setosa
    2   4.7              3.2         1.3             0.2      Setosa
    
    dataframe.variety.unique()
    array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)
    
    target_map = dict(zip(list(dataframe['variety'].unique()), 
                         ([0, 1, 2])))
    target_map
    {'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}
    
    dataframe['target'] = dataframe.variety.map(target_map) 
    dataframe.sample()
        sepal.length    sepal.width petal.length  petal.width   variety   target
    128      6.4             2.8       5.6           2.1       Virginica    2
    
    X = dataframe.iloc[:, :4] 
    Y = dataframe.iloc[:, 5]
    
    X.shape, Y.shape
    ((150, 4), (150,))
    
    from tensorflow.keras.utils import to_categorical
    from sklearn.model_selection import train_test_split
    
    OHE_Y = to_categorical(Y, num_classes=3)
    X_train, X_test, y_train, y_test = train_test_split(X, OHE_Y, 
                                                          test_size=0.2)
    
    X_train.shape
    (120, 4)
    
    # make it lstm compatible input 
    X_train = X_train.values.reshape(-1, 1, 4)
    
    X_train.shape ,y_train.shape
    ((120, 1, 4), (120, 3))
    

    Model

    from tensorflow.keras import Sequential 
    from tensorflow.keras.layers import LSTM, Dense 
    
    def create_nn_model():
      model = Sequential()
      model.add(LSTM(100, dropout=0.2, input_shape=(X_train.shape[1],
                                                   X_train.shape[2])))
      model.add(Dense(100, activation='relu'))
      model.add(Dense(3,activation='softmax'))
      model.compile(loss='categorical_crossentropy',
                    optimizer='adam', metrics=['accuracy'])
      return model
    
    model = create_nn_model()
    model.summary()
    
    model.fit(X_train, y_train, epochs=10,batch_size=5)
    
    ...
    Epoch 9/10
    3ms/step - loss: 0.5224 - accuracy: 0.7243
    Epoch 10/10
    3ms/step - loss: 0.5568 - accuracy: 0.7833
    

    Inference

    model.evaluate(X_train, y_train)
    4ms/step - loss: 0.3843 - accuracy: 0.9583
    [0.38432881236076355, 0.9583333134651184]
    
    y_pred = model.predict(X_train).argmax(-1)
    y_pred
    array([2, 1, 1, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0, 1, 1, 1, 0, 1, 0, 0, 2, 0,
           0, 2, 2, 0, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 2, 0, 2, 2, 0, 2, 0, 0,
           1, 1, 2, 0, 1, 2, 1, 2, 0, 0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0,
           2, 2, 0, 2, 1, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 1, 0, 2, 0,
           0, 2, 1, 0, 2, 1, 1, 1, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2,
           0, 1, 2, 1, 0, 0, 2, 1, 2, 0])