I'm trying to create a 2 layer lstm (incl. dropout) but get an error message that 'inputs must be a sequence'.
I use embeddings as the input and not sure how to change these to be a sequence? Any explanations are greatly appreciated.
This is my graph definition:
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, n_steps], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_var = tf.Variable(tf.random_uniform([vocab_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
embedded_chars = tf.nn.embedding_lookup(embeddings_var, input_x)
print(embedded_chars, 'embed')
def get_a_cell(lstm_size, keep_prob):
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
return drop
with tf.name_scope('lstm'):
cell = tf.nn.rnn_cell.MultiRNNCell(
[get_a_cell(num_hidden, dropout_keep_prob) for _ in range(num_layers)]
)
lstm_outputs, state = tf.nn.static_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
with tf.name_scope('Fully_connected'):
W = tf.Variable(tf.truncated_normal([num_hidden, n_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=n_classes))
output = tf.nn.xw_plus_b(lstm_outputs,W,b)
predictions = tf.argmax(output, 1, name='predictions')
with tf.name_scope('Loss'):
# Cross-entropy loss and optimizer initialization
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=input_y))
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss1, global_step=global_step)
with tf.name_scope('Accuracy'):
# Accuracy metrics
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.nn.softmax(output)), input_y), tf.float32))
with tf.name_scope('num_correct'):
correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
num_correct = tf.reduce_sum(tf.cast(correct_predictions, 'float'), name='num_correct')
EDIT: when changing static_rnn to dynamic_rnn the error message changes to the following, failing on the bias (b) variable:
TypeError: 'int' object is not iterable
After I changed the bias term to this:
b = tf.Variable(tf.random_normal([n_classes]))
and get a new error message:
ValueError: Shape must be rank 2 but is rank 3 for 'Fully_connected/xw_plus_b/MatMul' (op: 'MatMul') with input shapes: [?,27,128], [128,6].
If we assume you use tf.dynamic_rnn
(for the case of tf.static_rnn
, the first problem is because you don't give the input in the right format, tf.static_rnn
except a sequence of tensor such as list of tensors [batch_size x seq_len]
and not a single tensor with shape [batch_size x seq_len x dim]
whereas tf.dynamic_rnn
deals with such tensors as input)
I invite you to read the documentation of tf.nn_dynamic_rnn to see that for your classification problem you might not want to use lstm_outputs
but state
which basically contain the last output of your RNN, because lstm_output contains all the outputs , whereas here you are interested on only in the last_output (except if you want to do something like attention for classification , here you'll need all the outputs).
To get the last output you'll basically need to do that:
lstm_outputs, state = tf.nn.dynamic_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
last_output = state[-1].h
state[-1]
to take the state of the last cell, then h
contains the last output and pass last_output
to your feed forward network.
(working, but compute wrong accuracy see comments)
n_classes = 6
n_steps = 27
num_hidden=128
dropout_keep_prob =0.5
vocab_size=10000
EMBEDDING_DIM=300
num_layers = 2
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, n_steps], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_var = tf.Variable(tf.random_uniform([vocab_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
embedded_chars = tf.nn.embedding_lookup(embeddings_var, input_x)
print(embedded_chars, 'embed')
def get_a_cell(lstm_size, keep_prob):
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
return drop
with tf.name_scope('lstm'):
cell = tf.nn.rnn_cell.MultiRNNCell(
[get_a_cell(num_hidden, dropout_keep_prob) for _ in range(num_layers)]
)
lstm_outputs, state = tf.nn.dynamic_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
last_output = state[-1].h
with tf.name_scope('Fully_connected'):
W = tf.Variable(tf.truncated_normal([num_hidden, n_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[n_classes]))
output = tf.nn.xw_plus_b(last_output,W,b)
predictions = tf.argmax(output, 1, name='predictions')
with tf.name_scope('Loss'):
# Cross-entropy loss and optimizer initialization
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=input_y))
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss1, global_step=global_step)
with tf.name_scope('Accuracy'):
# Accuracy metrics
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.nn.softmax(output)), input_y), tf.float32))
with tf.name_scope('num_correct'):
correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
num_correct = tf.reduce_sum(tf.cast(correct_predictions, 'float'), name='num_correct')