Search code examples
pythontensorflowkeras

'UTF-8 codec can't decode byte 0xfc: invalid start byte - load_model (keras)


So I have been trying to create an easy chatbot. Here is the code aka training.py:

import random
import json
import pickle
import numpy as np
import tensorflow as tf
import nltk
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

intents = json.loads(open('intents.json').read())

words = []
classes = []
documents = []
ignoreLetters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        wordList = nltk.word_tokenize(pattern)
        words.extend(wordList)
        documents.append((wordList, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [lemmatizer.lemmatize(word) for word in words if word not in ignoreLetters]
words = sorted(set(words))

classes = sorted(set(classes))

pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

training = []
outputEmpty = [0] * len(classes)

for document in documents:
    bag = []
    wordPatterns = document[0]
    wordPatterns = [lemmatizer.lemmatize(word.lower()) for word in wordPatterns]
    for word in words:
        bag.append(1) if word in wordPatterns else bag.append(0)

    outputRow = list(outputEmpty)
    outputRow[classes.index(document[1])] = 1
    training.append(bag + outputRow)

random.shuffle(training)
training = np.array(training)

trainX = training[:, :len(words)]
trainY = training[:, len(words):]

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128, input_shape=(len(trainX[0]),), activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(64, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(len(trainY[0]), activation='softmax'))

sgd = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(trainX, trainY, epochs=200, batch_size=5, verbose=1)
model.save('chatbot.h5')
print('Done')

And here is the code aka main.py for making the chatbot work:

import random
import json
import pickle
import numpy as np
import tensorflow as tf
import nltk
from nltk.stem import WordNetLemmatizer
from keras.models import load_model

lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intents.json').read())

words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))
model = load_model('chatbot.h5')

def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word) for word in sentence_words]
    return sentence_words

def bag_of_words(sentence):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for w in sentence_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1
    return np.array(bag)

def predict_class(sentence):
    bow = bag_of_words(sentence)
    res = model.predict(np.array([bow]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
    return return_list

def get_response(intents_list, intents_json):
    tag = intents_list[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

print("Go! Bot is running!")

while True: 
    message = input("")
    ints = predict_class(message)
    res = get_response(ints, intents)
    print(res)

When I try to execute main.py this error pops up:

C:/Users/User/AppData/Local/Programs/Python/Python311/python.exe "c:/Users/User/source/repos/ai/main.py" Traceback (most recent call last): File "c:\Users\User\source\repos\ai\main.py", line 15, in model = load_model('chatbot.h5') ^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\saving\saving_api.py", line 212, in load_model return legacy_sm_saving_lib.load_model( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler raise e.with_traceback(filtered_tb) from None File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\lib\io\file_io.py", line 703, in is_directory_v2 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfc in position 31: invalid start byte

The training.py works as expected and saves the words and classes in pkl files. But I just can't make the load.model work. I jhave seen that there were others with a similar problem but that didn't work out for me.


Solution

  • Here are the changes I made and it worked for me!

    model.save('chatbot.h5', save_format='h5')
    

    And here the main.py for the chatbot:

    model = tf.keras.models.load_model('chatbot.h5', compile=True)
    

    I hope this helps for you!