Search code examples
pythonpython-3.xtensorflowkerasdeep-learning

Tensorflow keras error AttributeError: 'tuple' object has no attribute 'lower'


I have code:

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

conversations = [
    ("Hello", "Hi there!"),
    ("How are you?", "I'm doing well, thanks."),
    ("What's your name?", "I'm a chatbot."),
]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(conversations)

vocab_size = len(tokenizer.word_index) + 1

sequences = tokenizer.texts_to_sequences(conversations)
max_sequence_len = max([len(seq) for seq in sequences])

X, y = zip(*sequences)
X = pad_sequences(X, maxlen=max_sequence_len, padding='post')
y = pad_sequences(y, maxlen=max_sequence_len, padding='post')

model = Sequential([
    Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
    LSTM(100, return_sequences=True),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X, y, epochs=50, verbose=1)

def generate_response(input_text):
    input_seq = tokenizer.texts_to_sequences([input_text])
    padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
    predicted_output = model.predict(padded_input)
    predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()
    response = tokenizer.sequences_to_texts(predicted_word_index)
    return response[0]

while True:
    user_input = input(">>> ")
    response = generate_response(user_input)
    print(f"Chatbot: {response}")

But I have error:

Traceback (most recent call last):
  File "C:\Users\mceca\Desktop\chatbot.py", line 14, in <module>
    tokenizer.fit_on_texts(conversations)
  File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 293, in fit_on_texts
    seq = text_to_word_sequence(
  File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 74, in text_to_word_sequence
    input_text = input_text.lower()
AttributeError: 'tuple' object has no attribute 'lower'

I have installed tensorflow==2.12.0

I don't want to change tensorflow version, and my OS is windows 10 and I don't have GPU, I only have CPU.

I work with keras, but if you have suggestion for other way in tensorflow, you can suggest me.

How I can fix this error? If you have more answers, type all.


Solution

  • The error you're encountering occurs because the Tokenizer class from keras.preprocessing.text expects a list of strings (i.e., text samples) but is receiving a list of tuples instead.

    Understanding the Error The error message AttributeError: 'tuple' object has no attribute 'lower' happens because the Tokenizer.fit_on_texts method is trying to call the .lower() method on each text sample, but it encounters a tuple instead of a string.

    Solution You need to adjust your code so that you pass a list of text strings rather than tuples. For your chatbot task, you should separate the input and output text into two different lists, then combine them when needed.

    import tensorflow as tf
    from tensorflow.keras.preprocessing.text import Tokenizer
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Embedding, LSTM, Dense
    
    # Prepare the conversation data
    inputs = ["Hello", "How are you?", "What's your name?"]
    responses = ["Hi there!", "I'm doing well, thanks.", "I'm a chatbot."]
    
    # Initialize and fit the tokenizer on the input texts
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(inputs + responses)  # Fit on both inputs and responses
    
    # Define vocabulary size
    vocab_size = len(tokenizer.word_index) + 1
    
    # Convert texts to sequences
    input_sequences = tokenizer.texts_to_sequences(inputs)
    response_sequences = tokenizer.texts_to_sequences(responses)
    
    # Determine the maximum sequence length
    max_sequence_len = max(max(len(seq) for seq in input_sequences),
                        max(len(seq) for seq in response_sequences))
    
    # Pad sequences
    X = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='post')
    y = pad_sequences(response_sequences, maxlen=max_sequence_len, padding='post')
    
    # Define the model
    model = Sequential([
        Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
        LSTM(100, return_sequences=True),
        Dense(vocab_size, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    # Fit the model
    model.fit(X, y, epochs=50, verbose=1)
    
    def generate_response(input_text):
        input_seq = tokenizer.texts_to_sequences([input_text])
        padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
        predicted_output = model.predict(padded_input)
        predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()[0]
        response = tokenizer.sequences_to_texts([predicted_word_index])
        return response[0]
    
    # Interaction loop
    while True:
        user_input = input(">>> ")
        response = generate_response(user_input)
        print(f"Chatbot: {response}")