Search code examples
pythontensorflowmachine-learningcomputer-science

How to input user images to predict with Tensorflow?


For my project, I am using tensorflow to predict handwritten user input.

Basically I used this dataset: https://www.kaggle.com/rishianand/devanagari-character-set, and created a model. I used matplotlib to see the images that were being produced by the pixels.

My code essentially works with training data, but i want to up it up a little. Through CV2, I created a GUI that allows users to draw a Nepali Letter. After this, I have branching that tells the program to save the image inside the computer.

This is a snippet of my code for it:

#creating a forloop to show the image
while True:
    img=cv2.imshow('window', win) #showing the window
    k= cv2.waitKey(1) 
    if k==ord('c'):
        win= np.zeros((500,500,3), dtype='float64') #creating a new image
    #saving the image as a file to then resize it
    if k==ord('s'):
        cv2.imwrite("nepali_character.jpg", win)
        img= cv2.imread("nepali_character.jpg")
        cv2.imshow('char', img)
        #trying to resize the image using Pillow
        size=(32,32)
        #create a while loop(make the user print stuff until they print something that STOPS it)
        im= Image.open("nepali_character.jpg")
        out=im.resize(size)
        l= out.save('resized.jpg')
        imgout= cv2.imread('resized.jpg')
        cv2.imshow("out", imgout)
        #finding the pixels of the image, will be printed as a matrix
        pix= cv2.imread('resized.jpg', 1)
        print(pix)
    if k==ord('q'): #if k is 27 then we break the window
        cv2.destroyAllWindows()
        break

I resize the image, because those were the dimensions of the data from the dataset.

Now my question is HOW do I predict what that letter is through tensorflow.

When I asked my teacher about it, he said to put it in my data file, and then treat it as a training data, and then look at the weights, and pick the greatest weight?

But I'm confused to go I can put this image into that data file?

If anyone has any suggestions of how to take user input and then predict, that would be greatly appreciated


Solution

  • Understand the dataset:

    1. the size of the image is 32 x 32
    2. there are 46 different characters/alphabets
    ['character_10_yna', 'character_11_taamatar', 'character_12_thaa', 'character_13_daa', 'character_14_dhaa', 'character_15_adna', 'character_16_tabala', 'character_17_tha', 'character_18_da', 'character_19_dha', 'character_1_ka', 'character_20_na', 'character_21_pa', 
    'character_22_pha', 'character_23_ba', 'character_24_bha', 'character_25_ma',
     'character_26_yaw', 'character_27_ra', 'character_28_la', 'character_29_waw', 'character_2_kha', 'character_30_motosaw', 'character_31_petchiryakha', 'character_32_patalosaw', 'character_33_ha', 'character_34_chhya', 
    'character_35_tra', 'character_36_gya', 'character_3_ga', 'character_4_gha', 'character_5_kna', 'character_6_cha', 'character_7_chha', 'character_8_ja', 
    'character_9_jha', 'digit_0', 'digit_1', 'digit_2', 'digit_3', 'digit_4', 'digit_5', 'digit_6', 'digit_7', 'digit_8', 'digit_9']
    

    As your images are in categorized in a folder Traing Folder

    so keras implementation will be:

    import matplotlib.pyplot as plt
    import numpy as np
    import os
    import PIL
    import tensorflow as tf
    
    from tensorflow import keras
    from tensorflow.keras import layers
    from tensorflow.keras.models import Sequential
    import pathlib
    dataDir = "/xx/xx/xx/xx/datasets/Devanagari/drive-download-20210601T224146Z-001/Train"
    data_dir = keras.utils.get_file(dataDir, 'file://'+dataDir)
    data_dir = pathlib.Path(data_dir)
    image_count = len(list(data_dir.glob('*/*.png')))
    print(image_count)
    batch_size = 32
    img_height = 180 # scale it up for better performance
    img_width = 180 # scale it up for better performance
    
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
      data_dir,
      validation_split=0.2,
      subset="training",
      seed=123,
      image_size=(img_height, img_width),
      batch_size=batch_size)
    
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
      data_dir,
      validation_split=0.2,
      subset="validation",
      seed=123,
      image_size=(img_height, img_width),
      batch_size=batch_size)
    class_names = train_ds.class_names
    print(class_names) # 46 classes
    

    For caching and normalization refer tensorflow tutorial

    AUTOTUNE = tf.data.experimental.AUTOTUNE
    
    train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
    normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
    normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
    image_batch, labels_batch = next(iter(normalized_ds))
    first_image = image_batch[0]
    print(np.min(first_image), np.max(first_image))
    

    model setup compile and training

    num_classes = 46
    
    model = Sequential([
      layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
      layers.Conv2D(16, 3, padding='same', activation='relu'),
      layers.MaxPooling2D(),
      layers.Conv2D(32, 3, padding='same', activation='relu'),
      layers.MaxPooling2D(),
      layers.Conv2D(64, 3, padding='same', activation='relu'),
      layers.MaxPooling2D(),
      layers.Flatten(),
      layers.Dense(128, activation='relu'),
      layers.Dense(num_classes)
    ])
    
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    
    epochs=10
    history = model.fit(
      train_ds,
      validation_data=val_ds,
      epochs=epochs
    )
    
    

    this will result in as following( very promising!)

    Epoch 10/10
    1955/1955 [==============================] - 924s 472ms/step - loss: 0.0201 - accuracy: 0.9932 - val_loss: 0.2267 - val_accuracy: 0.9504
    

    Save the model (this will take time to train, so better save the model)

    !mkdir -p saved_model
    model.save('saved_model/my_model')
    

    load the model:

    loaded_model = tf.keras.models.load_model('saved_model/my_model')
    # Check its architecture
    loaded_model.summary()
    

    now the final task, get the prediction. One way is as following:

    import cv2
    im2=cv2.imread('datasets/Devanagari/drive-download-20210601T224146Z-001/Test/character_3_ga/3711.png')
    im2=cv2.resize(im2, (180,180)) # resize to 180,180 as that is on which model is trained on
    print(im2.shape)
    img2 = tf.expand_dims(im2, 0) # expand the dims means change shape from (180, 180, 3) to (1, 180, 180, 3)
    print(img2.shape)
    
    predictions = loaded_model.predict(img2)
    score = tf.nn.softmax(predictions[0]) # # get softmax for each output
    
    print(
        "This image most likely belongs to {} with a {:.2f} percent confidence."
        .format(class_names[np.argmax(score)], 100 * np.max(score))
    ) # get the np.argmax, means give me the index where probability is max, in this case it got 29. This answers the response 
    # you got from your instructor. that is "greatest weight"
    
    (180, 180, 3)
    (1, 180, 180, 3)
    This image most likely belongs to character_3_ga with a 100.00 percent confidence.
    

    another way is through online. the one you are trying to achive. the image shape need to be in (1, 180, 180, 3) for this example or can be (1, 32, 32, 3) if no resize was done. and then feed it to predict. somthing like below

    out=im.resize(size)
    out = tf.expand_dims(out, 0)
    predictions = loaded_model.predict(out)
    score = tf.nn.softmax(predictions[0]) # # get softmax for each output
    
    print(
        "This image most likely belongs to {} with a {:.2f} percent confidence."
        .format(class_names[np.argmax(score)], 100 * np.max(score))
    )