enter image description here Getting issues with my code unable to understand what to do next can anyone help me out
# Importing the libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import pickle
import re
# Importing the dataset
filename = "MoviePlots.csv"
data = pd.read_csv(filename, encoding= 'unicode_escape')
# Keeping only the neccessary columns
data = data[['Plot']]
# Clean the data
data['Plot'] = data['Plot'].apply(lambda x: x.lower())
data['Plot'] = data['Plot'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))
# Create the tokenizer
tokenizer = Tokenizer(num_words=5000, split=" ")
tokenizer.fit_on_texts(data['Plot'].values)
# Save the tokenizer
with open('tokenizer.pickle', 'wb') as handle:
pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
# Create the sequences
X = tokenizer.texts_to_sequences(data['Plot'].values)
X = pad_sequences(X)
# Create the model
model = Sequential()
model.add(Embedding(5000, 256, input_length=X.shape[1]))
model.add(Bidirectional(LSTM(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)))
model.add(LSTM(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))
model.add(LSTM(256, dropout=0.1, recurrent_dropout=0.1))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(5000, activation='softmax'))
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), metrics=['accuracy'])
# Train the model
model.fit(X, X, epochs=100, batch_size=128, verbose=1)
# Saving the model
model.save('visioniser.h5')
This is my code and error in the image attached
Anyone please help me out solve this problem of my code please diagnose it
It appears that the error is happening with data['Plot'] = data['Plot'].apply(lambda x: x.lower())
(you are calling the apply
function on a column of data -> one of the values in the column is not a string so it doesn't have the lower
method)!
You could fix this by checking if the instance is actually of type string:
data['Plot'] = data['Plot'].apply(lambda x: x.lower() if isinstance(x, str) else x)
or instead of using a lambda function:
data['Plot'] = data['Plot'].str.lower()
whereas panda
´s str.lower
skips values that are not strings!