python tensorflow keras speech-recognition speech-to-text

tensorflow.python.framework.errors_impl.InvalidArgumentError: Specified a list with shape [60,1] from a tensor with shape [1,1]

I am trying to make a "hot word" detector in Keras (since Snowboy is discontinued). But I can't seem to get my model to make a prediction. Here is my code.

from keras.models import model_from_json
from keras.layers import Dense, LSTM, GRU, Embedding 
from keras.utils import np_utils, to_categorical
from keras.models import Sequential
from keras.losses import CategoricalCrossentropy
import numpy as np

#x = [[[1],[1],[1],[1]],[[1],[1],[1],[1]]]
y = np.asarray([1,1,1,1,1])
x = np.asarray(np.load("pos.npy"))[:10]
print(x.shape)
x = np.append(x,np.asarray(np.load("neg.npy"))[:50],axis=0)
print(x.shape)
y = np.ones(10)
y = np.append(y,np.zeros(50))
print(y.shape)
loss = CategoricalCrossentropy()

#[batch, timesteps, feature]
model = Sequential()
model.add(LSTM(200,batch_input_shape=(None,88200,1),return_sequences=0))
model.add(Dense(1,activation="sigmoid"))
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics="acc")
model.summary()
model.fit(x = x, y=y, epochs=5, batch_size=10)

model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("model.h5")
print("Saved model to disk")

And here is how I am trying to predict.

dir = "..."
# load json and create model
json_file = open(dir+'model1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(dir+"model1.h5")
print("Loaded model from disk")

# evaluate loaded model on test data
loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model = loaded_model
input("record")
data = sd.rec((2*44100),44100,1)
sd.wait()
print(data.shape)
print(len(data))
data = data.reshape((1, 88200,1))
print(data.shape)
a = model.predict_classes(x=data)
print(a)

I have also tried model.predict(data) and I have tried to do it with different shapes. I also have no idea if my model is would work for wake-word recognition or not but it says it gets up to about 90% accuracy on 5 epochs when training. Here is the full error.

UserWarning: `model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
  warnings.warn('`model.predict_classes()` is deprecated and '
2021-02-20 19:44:40.240690: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
Traceback (most recent call last):
  File "C:/Users/usr/PycharmProjects/wake_words/load.py", line 24, in <module>
    a = model.predict_classes(x=data)
  File "C:\Users\usr\Desktop\anaconda1\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 459, in predict_classes
    proba = self.predict(x, batch_size=batch_size, verbose=verbose)
  File "C:\Users\us\Desktop\anaconda1\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1629, in predict
    tmp_batch_outputs = self.predict_function(iterator)
  File "C:\Users\usr\Desktop\anaconda1\lib\site-packages\tensorflow\python\eager\def_function.py", line 828, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\usr\Desktop\anaconda1\lib\site-packages\tensorflow\python\eager\def_function.py", line 895, in _call
    filtered_flat_args, self._concrete_stateful_fn.captured_inputs)  # pylint: disable=protected-access
  File "C:\Users\usr\Desktop\anaconda1\lib\site-packages\tensorflow\python\eager\function.py", line 1919, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "C:\Users\usr\Desktop\anaconda1\lib\site-packages\tensorflow\python\eager\function.py", line 560, in call
    ctx=ctx)
  File "C:\Users\usr\Desktop\anaconda1\lib\site-packages\tensorflow\python\eager\execute.py", line 60, in quick_execute
    inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError:    Specified a list with shape [60,1] from a tensor with shape [1,1]
     [[{{node TensorArrayUnstack/TensorListFromTensor}}]]
     [[sequential_8/lstm_8/PartitionedCall]] [Op:__inference_predict_function_1035]

Function call stack:
predict_function -> predict_function -> predict_function

Here is the code that creates the "pos.npy" and "neg.npy". Its just recording with sounddevice and saving those recordings.

import sounddevice as sd
from time import sleep
import numpy as np
# define data
positive_samples = 15
negative = 100
time = 2
p = []
n = []

print("say the word")
for i in range(positive_samples):
    input(i)
    data = sd.rec((time*44100),44100,1)
    sd.wait()
    p.append(data)
print("don't say the word")
for i in range(negative):
    print(i)
    data = sd.rec((time*44100),44100,1)
    sd.wait()
    n.append(data)
# save to csv file
np.save("pos.npy",p)
np.save("neg.npy",n)

data = np.load("pos.npy")

EDIT 1: I got model.predict(data) to work if I have it predict on the first 32 samples but it won't let me do it on any other amount of samples. Here is what I have:

import sounddevice as sd
import numpy as np
from tensorflow.keras.models import model_from_json
dir = "C:\\Users\\usr\\Desktop\\folder\\"
# load json and create model
json_file = open(dir+'model1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(dir+"model1.h5")
print("Loaded model from disk")

# evaluate loaded model on test data
loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model = loaded_model
#get the first 32 samples
data = np.load("neg.npy")[:32]
a = model.predict(x=data)
print(a)

Solution

The problem was that the model.predict(data) function wanted my data to have the same shape as the number of the batch_size. There are two ways to fix this. 1. Change the batch size in your model to 1. 2 (and this is the best option). model.predict(data,batch_size=1)