Search code examples
pythontensorflowmachine-learning

How to classify multiple classes in TensorFlow


I have followed a tutorial on youtube and the tutorial shows me how to classify 2 datasets (cough, not cough), but now I need to add an extra class which is sneeze, so there are 3 classes that need to be trained on (cough, sneeze, other), and I have no idea how to do this. PLEASE HELP!!!

In the code, the model is training on 2 classes (cough, not_cough) and performs quite good, but I can't get it work on multiple classes like (cough,sneeze, other).

import os
from matplotlib import pyplot as plt
import tensorflow as tf 
import tensorflow_io as tfio
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D, Dropout,TimeDistributed, Reshape
from tensorflow.keras.optimizers.legacy import Adam
from keras import layers
from keras.utils import to_categorical

def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels) 
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

def preprocess(file_path, label): 
    wav = load_wav_16k_mono(file_path)
    wav = wav[:8000]
    zero_padding = tf.zeros([8000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    
    spectrogram = tf.signal.stft(wav, frame_length=100, frame_step=20)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label


def get_CNN(input_shape):
    model = Sequential()
    model.add(Conv2D(16, (3,3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(16, (3,3), activation='relu'))
    model.add(MaxPool2D((2,2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='softmax'))
    
    model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision(),'accuracy'])
    model.summary() # drop in some max pool layers to reduce params
    return model
    

def main():
    POS_COUGH = "./data/cough"
    NEG_COUGH = "./data/not_cough"
  
    #POS_SPEECH = "./data/speech"

    pos_cough = tf.data.Dataset.list_files(POS_COUGH+'\*.wav')
    neg_cough = tf.data.Dataset.list_files(NEG_COUGH+'\*.wav')
    
    #pos_speech = tf.data.Dataset.list_files(POS_SPEECH +'\*.wav')

    cough_labels = tf.data.Dataset.from_tensor_slices(tf.ones(len(pos_cough)))  
    
    not_cough_labels = tf.data.Dataset.from_tensor_slices(tf.ones(len(neg_cough))) 
    
    # Add labels and Combine Positive and Negative Samples
    cough = tf.data.Dataset.zip((pos_cough, cough_labels))
    
    not_cough = tf.data.Dataset.zip((neg_cough, not_cough_labels))
   
    negatives = not_cough
    positives = cough
    # join both sameples 
    data = positives.concatenate(negatives)

    ### 2. Create a Tensorflow Data Pipeline
    data = data.map(preprocess)
    data = data.cache()
    data = data.shuffle(buffer_size=1000)
    data = data.batch(16)
    data = data.prefetch(8)
    
    ## 3. Split data into train and test data
    train = data.take(int(len(data) * 0.7))
    test = data.skip(int(len(data) * 0.7)).take(int(len(data) - len(data) * 0.7))   #test.as_numpy_iterator().next()

    input_shape_spectrogram = (396, 65,1)
    model = get_CNN(input_shape_spectrogram)
    hist = model.fit(train, epochs=2, validation_data=test)

Solution

  • First of all, you need to have 3 classes in your dataset, which means that you need to distinguish sneeze samples as you have done it for cough/not cough. Then, you need to convert your output to one hot encoded vectors, in which, all elements are zero except for the element corresponding to the class index. For example, if you consider not cough = 0, cough= 1, and sneeze =2, a sample with sneeze must be [0, 0, 1], a sample with cough must be [0, 1, 0] and a sample with no cough must be [1, 0, 0] Finally, your output layer should have 3 neurons.

    model.add(Dense(3, activation='softmax'))