Search code examples
pythontensorflowkerasartificial-intelligencechatbot

Input incompatible with the layer for chatbot prediction model


this is my traning model, I pretty much follow this tutorial https://www.youtube.com/watch?v=1lwddP0KUEg except I have to use stanza package for spanish version:

import random
import json
import pickle
import numpy as np
import pandas as pd

import nltk
from nltk.tokenize.toktok import ToktokTokenizer
import stanza

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

toktok = ToktokTokenizer()

intents = json.loads(open("intents.json", "rb").read())

words = []
classes = []
documents = []
ignore_letters = ['¿','?', '!', '.', ',']
nlp = stanza.Pipeline('es')

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = toktok.tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [nlp(word).to_dict() for word in words if word not in ignore_letters]  

palabras = []
for i in range(0,len(words)):    
    texto = words[i][0][0]["lemma"]
    palabras.append(texto)
    palabras = sorted(set(palabras))
    
classes = sorted(set(classes))

pickle.dump(palabras, open('palabras.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [nlp(word.lower()).to_dict() for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
        
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])
    
random.shuffle(training)
training = np.array(training, dtype="object")

train_x = list(training[:, 0])
train_y = list(training[:, 1])

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbotmodel.h5', hist)
print('Listo')

The problem is that when I try to use the model to predict the class with the following code:

import random
import json
import pickle
import numpy as np

import nltk
from nltk.tokenize.toktok import ToktokTokenizer
import stanza

from tensorflow.keras.models import load_model

toktok = ToktokTokenizer()

intents = json.loads(open('intents.json').read())

words = pickle.load(open('palabras.pkl','rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
model = load_model('chatbotmodel.h5')
nlp = stanza.Pipeline('es')

def clean_up_sentence(sentence):
    sentence_words = toktok.tokenize(sentence)
    words = [nlp(word).to_dict() for word in sentence_words]  
    palabras = []
    for i in range(0,len(words)):    
        texto = words[i][0][0]["lemma"]
        palabras.append(texto)
        palabras = sorted(set(palabras))
    
    return palabras

def bag_of_words(sentence):
    sentences_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for w in sentences_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1
    
    return np.array(bag)

def predict_class(sentence):
    bow = bag_of_words(sentence)
    res = model.predict(np.array([bow]))[0]
    
    return res

predict_class('Hola, ¿cómo te va?)

Give me this error:

ValueError: Input 0 of layer sequential_1 is incompatible with the layer: expected axis -1 of input shape to have value 15 but received input with shape (None, 13)

Any idea of what it's wrong?, because I have followed the tutorial pretty much to the step. The intents.json data for trainning is very simple:

{"intents": [
   {"tag": "respuestaSaludo",
    "patterns": ["bien","¿quien es?","¿qué desea?"],
    "responses": ["Mi nombre es Juan Carlos Bellido", "Soy Juan"]
   },
   {"tag": "respuestaPropuesta",
    "patterns": ["no","no él gracias","no por ahora, aquí","de nuevo"],
    "responses": ["ok, gracias","ok"]
   }
]}

EDIT 1: model.summary():

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 128)               2048      
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 130       
=================================================================
Total params: 10,434
Trainable params: 10,434
Non-trainable params: 0
_________________________________________________________________

bow.shape:

def predict_class(sentence):
    bow = bag_of_words(sentence)

    return print(bow.shape)

predict_class('quiero ver que sucede')

Output:

(13,)

Solution

  • As far as I understand - your model is expecting sequence length of 15. You are feeding 13 tokens.

    Try to append your sequence with two zeros:

    bow = bag_of_words(sentence)
    bow = np.concatenate((bow, np.array([0, 0])), -1)
    res = model.predict(np.array([bow]))[0]