Search code examples
pythontensorflowkerasrecurrent-neural-networktensorflow2.x

ValueError: Unexpected result of `train_function` (Empty logs). for RNN


I am reproducing the examples of the chapter 16 of the book Hands-On Machine Learning of Aurélien Géron and found an error while trying to train a simple RNN model.

The error is the following:

ValueError: Unexpected result of `train_function` (Empty logs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`. 

The code used to retrieve the data and preprocess:

shakespeare_url = 'https://homl.info/shakespeare'
filepath = utils.get_file('shakespeare.txt', shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

# Let's tokenize the text at characters level
tokenizer = preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts([shakespeare_text])

# Number of distinct characters
max_id = len(tokenizer.word_index)
# total number of characters
dataset_size = tokenizer.document_count

# Lets encode the full text and substract 1 to have a range of 0-38 instead of 1-39
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1

# Let's use the first 90% of the data to train the model 
train_size = dataset_size * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

n_steps = 100
window_length = n_steps + 1 # 100 steps plus the target
dataset = dataset.window(window_length, shift=1, drop_remainder=True)
# Let's flat our windows dataset into tensors to pass to the model 
dataset = dataset.flat_map(lambda window: window.batch(window_length))
# Let's shuffle the windows
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]), num_parallel_calls=AUTOTUNE)
# Encoding the categories as one-hot encoding since the categories are relatively few (39)
dataset = dataset.map(lambda x_batch, y_batch: (tf.one_hot(x_batch, depth=max_id), y_batch), num_parallel_calls=AUTOTUNE)
dataset = dataset.prefetch(AUTOTUNE)

Here is the code of the model:

model = models.Sequential([
    layers.GRU(128, return_sequences=True, input_shape=[None, max_id], dropout=0.2, recurrent_dropout=0.2),
    layers.GRU(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
    layers.TimeDistributed(layers.Dense(max_id, activation='softmax'))
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_crossentropy'])

history = model.fit(dataset, epochs=20)

Feel free to request more information if needed. Thanks in advance.


Solution

  • The problem is that tokenizer.document_count considers the whole text as one data entry, which is why dataset_size equals 1 and train_size therefore equals 0, resulting in an empty data set. Try using the encoded array to get the true number of data entries:

    import tensorflow as tf
    import numpy as np
    
    shakespeare_url = 'https://homl.info/shakespeare'
    filepath = tf.keras.utils.get_file('shakespeare.txt', shakespeare_url)
    with open(filepath) as f:
        shakespeare_text = f.read()
    
    # Let's tokenize the text at characters level
    tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level=True)
    tokenizer.fit_on_texts([shakespeare_text])
    
    # Number of distinct characters
    max_id = len(tokenizer.word_index)
    # total number of characters
    
    # Lets encode the full text and substract 1 to have a range of 0-38 instead of 1-39
    [encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
    
    # Let's use the first 90% of the data to train the model
    dataset_size = encoded.shape[0]
    train_size = dataset_size * 90 // 100
    dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
    
    n_steps = 100
    window_length = n_steps + 1 # 100 steps plus the target
    dataset = dataset.window(window_length, shift=1, drop_remainder=True)
    # Let's flat our windows dataset into tensors to pass to the model 
    dataset = dataset.flat_map(lambda window: window.batch(window_length))
    # Let's shuffle the windows
    batch_size = 32
    dataset = dataset.shuffle(10000).batch(batch_size)
    dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]), num_parallel_calls=tf.data.AUTOTUNE)
    # Encoding the categories as one-hot encoding since the categories are relatively few (39)
    dataset = dataset.map(lambda x_batch, y_batch: (tf.one_hot(x_batch, depth=max_id), y_batch), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    
    model = tf.keras.Sequential([
        tf.keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id], dropout=0.2, recurrent_dropout=0.2),
        tf.keras.layers.GRU(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(max_id, activation='softmax'))
    ])
    print(model.summary())
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_crossentropy'])
    
    history = model.fit(dataset, epochs=20)