I want to do time series multi-class classification with time-series data. Here the data set I have got needs to be preprocessed heavily and that just to get an idea of how to implement the model I have used the IRIS data set(not suitable for LSTM) since it has the exact same structure of the time series data I have( 4 input features,1 output feature, 120 samples). I have the following code implemented but it causes me the invalid shape error when fitting the model with a batch size of 5 (changed the batch size many times but didn't seem to make any change)
#load dataset
dataframe = pandas.read_csv("iris.csv",header=None)
dataset = dataframe.values
X=dataset[:,0:4].astype(float)
Y=dataset[:,4]
# Encode the output variables
encoder = LabelEncoder()
encoder.fit(Y)
# convert output variables into the numbers
encoded_Y = encoder.transform(Y)
# Convert integers to dummy variables (one-hot encoded)
dummy_Y = np_utils.to_categorical(encoded_Y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,dummy_Y,test_size=0.2) #20% is allocated for the testing
X_train = X_train.reshape(60, 2, 4)
y_train = y_train.reshape(60, 2, 3)
y_train.shape,X_train.shape
((60, 2, 3), (60, 2, 4))
# Create the Neural Network Model
def create_nn_model():
#create sequential model
model = Sequential()
model.add(LSTM(100,dropout=0.2, input_shape=(X_train.shape[1],X_train.shape[2])))
model.add(Dense(100, activation='relu'))
model.add(Dense(3,activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
return model
model = create_nn_model()
model.summary()
> Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_1 (LSTM) (None, 100) 42000
_________________________________________________________________
dense_2 (Dense) (None, 100) 10100
_________________________________________________________________
dense_3 (Dense) (None, 3) 303
=================================================================
Total params: 52,403
Trainable params: 52,403
Non-trainable params: 0
model.fit(X_train,y_train,epochs=200,batch_size=5)
> ValueError Traceback (most recent call last)
<ipython-input-26-0aef33c299f0> in <module>()
----> 1 model.fit(X_train,y_train,epochs=200,batch_size=5) #X_train is independant variables. based on the amount of the data set data set will be trained by breaking into batches
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
984 except Exception as e: # pylint:disable=broad-except
985 if hasattr(e, "ag_error_metadata"):
--> 986 raise e.ag_error_metadata.to_exception(e)
987 else:
988 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:830 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:813 run_step *
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:771 train_step *
loss = self.compiled_loss(
/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py:201 __call__ *
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.7/dist-packages/keras/losses.py:142 __call__ *
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.7/dist-packages/keras/losses.py:246 call *
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper **
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/losses.py:1631 categorical_crossentropy
y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/backend.py:4827 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (5, 2, 3) and (5, 3) are incompatible
Your y_true
and y_pred
are not in the same shape. You may need to define your LSTM in the following way
model.add(LSTM(100,dropout=0.2, input_shape=(2,4), return_sequences=True))
....
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
....
dense_3 (Dense) (None, 2, 3) 303 < ---
=================================================================
Using return_sequences = True
would work because you define your Training-Paris in that way:
X_train = X_train.reshape(60, 2, 4)
y_train = y_train.reshape(60, 2, 3)
which represent (batch_size, timestep, input_lenght)
; but note that you need to reshape or fulfill the input requirement of the LSTM layer in your above model and not the y_train
. However, when you define your model, you don't use the return sequence and it makes the last layer have only the three classifiers without timestep, but your y_train
is defined in that way. But if you set the return sequence to True and plot your model summary, you would see that the last layer will have an output shape of (None, 2, 3
) which exactly matches the shape of y_train
.
Before understanding what the return_sequence
is doing here, you may need to know what timestep means in an LSTM Model, check this answer. AFAIK, it depends on how many timesteps you need to set for your input; I can make a single occurrence of the LSTM cell or multiple times (n-th
timestep). And for n-th
timestep (n: {1,2,3..N
), if I want from LSTM to return all timestep output (n
numbers), then I will set return_sequence = True
, but else return_sequence = False
. From doc,
return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. Default: False.
In short, if it sets as True, all sequences will return but if it's False, then only the last output will. For example:
inputs = tf.random.normal([32, 8])
inputs = tf.reshape(inputs, [-1, 2, 4 ]) # or [-1, 4, 2] # or [-1, 1, 8]
inputs.shape
TensorShape([32, 2, 4]) # (batch_size, timestep, input_length)
lstm = tf.keras.layers.LSTM(10, return_sequences=True)
whole_seq_output = lstm(inputs)
print(whole_seq_output.shape)
(32, 2, 10) # (batch_size, timestep, output_length)
lstm = tf.keras.layers.LSTM(10, return_sequences=False)
last_seq_output = lstm(inputs)
print(last_seq_output.shape)
(32, 10) # (batch_size, output_length)
Here is a one-way approach to your above code. Iris data took from here.
import pandas
dataframe = pandas.read_csv("/content/iris.csv")
dataframe.head(3)
sepal.length sepal.width petal.length petal.width variety
0 5.1 3.5 1.4 0.2 Setosa
1 4.9 3.0 1.4 0.2 Setosa
2 4.7 3.2 1.3 0.2 Setosa
dataframe.variety.unique()
array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)
target_map = dict(zip(list(dataframe['variety'].unique()),
([0, 1, 2])))
target_map
{'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}
dataframe['target'] = dataframe.variety.map(target_map)
dataframe.sample()
sepal.length sepal.width petal.length petal.width variety target
128 6.4 2.8 5.6 2.1 Virginica 2
X = dataframe.iloc[:, :4]
Y = dataframe.iloc[:, 5]
X.shape, Y.shape
((150, 4), (150,))
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
OHE_Y = to_categorical(Y, num_classes=3)
X_train, X_test, y_train, y_test = train_test_split(X, OHE_Y,
test_size=0.2)
X_train.shape
(120, 4)
# make it lstm compatible input
X_train = X_train.values.reshape(-1, 1, 4)
X_train.shape ,y_train.shape
((120, 1, 4), (120, 3))
Model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense
def create_nn_model():
model = Sequential()
model.add(LSTM(100, dropout=0.2, input_shape=(X_train.shape[1],
X_train.shape[2])))
model.add(Dense(100, activation='relu'))
model.add(Dense(3,activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
return model
model = create_nn_model()
model.summary()
model.fit(X_train, y_train, epochs=10,batch_size=5)
...
Epoch 9/10
3ms/step - loss: 0.5224 - accuracy: 0.7243
Epoch 10/10
3ms/step - loss: 0.5568 - accuracy: 0.7833
Inference
model.evaluate(X_train, y_train)
4ms/step - loss: 0.3843 - accuracy: 0.9583
[0.38432881236076355, 0.9583333134651184]
y_pred = model.predict(X_train).argmax(-1)
y_pred
array([2, 1, 1, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0, 1, 1, 1, 0, 1, 0, 0, 2, 0,
0, 2, 2, 0, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 2, 0, 2, 2, 0, 2, 0, 0,
1, 1, 2, 0, 1, 2, 1, 2, 0, 0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0,
2, 2, 0, 2, 1, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 1, 0, 2, 0,
0, 2, 1, 0, 2, 1, 1, 1, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2,
0, 1, 2, 1, 0, 0, 2, 1, 2, 0])