I have followed a tutorial on youtube and the tutorial shows me how to classify 2 datasets (cough, not cough), but now I need to add an extra class which is sneeze, so there are 3 classes that need to be trained on (cough, sneeze, other), and I have no idea how to do this. PLEASE HELP!!!
In the code, the model is training on 2 classes (cough, not_cough) and performs quite good, but I can't get it work on multiple classes like (cough,sneeze, other).
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D, Dropout,TimeDistributed, Reshape
from tensorflow.keras.optimizers.legacy import Adam
from keras import layers
from keras.utils import to_categorical
def load_wav_16k_mono(filename):
# Load encoded wav file
file_contents = tf.io.read_file(filename)
# Decode wav (tensors by channels)
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
# Removes trailing axis
wav = tf.squeeze(wav, axis=-1)
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
# Goes from 44100Hz to 16000hz - amplitude of the audio signal
wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
return wav
def preprocess(file_path, label):
wav = load_wav_16k_mono(file_path)
wav = wav[:8000]
zero_padding = tf.zeros([8000] - tf.shape(wav), dtype=tf.float32)
wav = tf.concat([zero_padding, wav],0)
spectrogram = tf.signal.stft(wav, frame_length=100, frame_step=20)
spectrogram = tf.abs(spectrogram)
spectrogram = tf.expand_dims(spectrogram, axis=2)
return spectrogram, label
def get_CNN(input_shape):
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(MaxPool2D((2,2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision(),'accuracy'])
model.summary() # drop in some max pool layers to reduce params
return model
def main():
POS_COUGH = "./data/cough"
NEG_COUGH = "./data/not_cough"
#POS_SPEECH = "./data/speech"
pos_cough = tf.data.Dataset.list_files(POS_COUGH+'\*.wav')
neg_cough = tf.data.Dataset.list_files(NEG_COUGH+'\*.wav')
#pos_speech = tf.data.Dataset.list_files(POS_SPEECH +'\*.wav')
cough_labels = tf.data.Dataset.from_tensor_slices(tf.ones(len(pos_cough)))
not_cough_labels = tf.data.Dataset.from_tensor_slices(tf.ones(len(neg_cough)))
# Add labels and Combine Positive and Negative Samples
cough = tf.data.Dataset.zip((pos_cough, cough_labels))
not_cough = tf.data.Dataset.zip((neg_cough, not_cough_labels))
negatives = not_cough
positives = cough
# join both sameples
data = positives.concatenate(negatives)
### 2. Create a Tensorflow Data Pipeline
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)
## 3. Split data into train and test data
train = data.take(int(len(data) * 0.7))
test = data.skip(int(len(data) * 0.7)).take(int(len(data) - len(data) * 0.7)) #test.as_numpy_iterator().next()
input_shape_spectrogram = (396, 65,1)
model = get_CNN(input_shape_spectrogram)
hist = model.fit(train, epochs=2, validation_data=test)
First of all, you need to have 3 classes in your dataset, which means that you need to distinguish sneeze samples as you have done it for cough/not cough. Then, you need to convert your output to one hot encoded vectors, in which, all elements are zero except for the element corresponding to the class index. For example, if you consider not cough = 0, cough= 1, and sneeze =2, a sample with sneeze must be [0, 0, 1], a sample with cough must be [0, 1, 0] and a sample with no cough must be [1, 0, 0] Finally, your output layer should have 3 neurons.
model.add(Dense(3, activation='softmax'))