Search code examples
pythonnumpymachine-learningneural-networkperceptron

How can I fix my perceptron to recognize numbers?


My exercise is to train 10 perceptrons to recognize numbers (0 - 9). Each perceptron should learn a single digit. As training data, I've created 30 images (5x7 bmp). 3 variants per digit.

I've got a perceptron class:

import numpy as np


def unit_step_func(x):
    return np.where(x > 0, 1, 0)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


class Perceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.activation_func = unit_step_func
        self.weights = None
        self.bias = None
        #self.best_weights = None
        #self.best_bias = None
        #self.best_error = float('inf')

    def fit(self, X, y):
        n_samples, n_features = X.shape

        self.weights = np.zeros(n_features)
        self.bias = 0

        #self.best_weights = self.weights.copy()
        #self.best_bias = self.bias

        for _ in range(self.n_iters):
            for x_i, y_i in zip(X, y):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self.activation_func(linear_output)

                update = self.lr * (y_i - y_predicted)
                self.weights += update * x_i
                self.bias += update

            #current_error = np.mean(np.abs(y - self.predict(X)))
            #if current_error < self.best_error:
            #    self.best_weights = self.weights.copy()
            #    self.best_bias = self.bias
            #    self.best_error = current_error

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self.activation_func(linear_output)
        return y_predicted

I've tried both, unit_step_func and sigmoid, activation functions, and pocketing algorithm to see if there's any difference. I'm a noob, so I'm not sure if this is even implemented correctly.

This is how I train these perceptrons:

import numpy as np
from PIL import Image
from Perceptron import Perceptron
import os

def load_images_from_folder(folder, digit):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        if img is not None:
            images.append(np.array(img).flatten())
            label = 1 if filename.startswith(f"{digit}_") else 0
            labels.append(label)
    return np.array(images), np.array(labels)


digits_to_recognize = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

perceptrons = []
for digit_to_recognize in digits_to_recognize:
    X, y = load_images_from_folder("data", digit_to_recognize)
    p = Perceptron()
    p.fit(X, y)
    perceptrons.append(p)

in short:

training data filename is in the format digit_variant. As I said before, each digit has 3 variants,

so for digit 0 it is 0_0, 0_1, 0_2,

for digit 1 it's: 1_0, 1_1, 1_2,

and so on...

load_images_from_folder function loads 30 images and checks the name. If digit part of the name is the same as digit input then it appends 1 in labels, so that the perceptron knows that it's the desired digit.

I know that it'd be better to load these images once and save them in some array of tuples, for example, but I don't care about the performance right now (I won't care later either).

for digit 0 labels array is [1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

for digit 1 labels array is [0,0,0, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

and so on...

then I train 10 perceptrons using this data.

This exercise also requires to have some kind of GUI that allows me to draw a number. I've choosen pygame, I could use pyQT, it actually does not matter.

This is the code, you can skip it, it's not that important (except for on_rec_button function, but I'll address on it):

import pygame
import sys

pygame.init()

cols, rows = 5, 7
square_size = 50
width, height = cols * square_size, (rows + 2) * square_size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Zad1")

rec_button_color = (0, 255, 0)
rec_button_rect = pygame.Rect(0, rows * square_size, width, square_size)

clear_button_color = (255, 255, 0)
clear_button_rect = pygame.Rect(0, (rows + 1) * square_size + 1, width, square_size)

mouse_pressed = False

drawing_matrix = np.zeros((rows, cols), dtype=int)


def color_square(x, y):
    col = x // square_size
    row = y // square_size

    if 0 <= row < rows and 0 <= col < cols:
        drawing_matrix[row, col] = 1


def draw_button(color, rect):
    pygame.draw.rect(screen, color, rect)


def on_rec_button():
    np_array_representation = drawing_matrix.flatten()

    for digit_to_recognize in digits_to_recognize:
        p = perceptrons[digit_to_recognize]
        predicted_number = p.predict(np_array_representation)
        if predicted_number == digit_to_recognize:
            print(f"Image has been recognized as number {digit_to_recognize}")


def on_clear_button():
    drawing_matrix.fill(0)


while True:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit()

        elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3:
            mouse_pressed = True

        elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
            mouse_pressed = False

        elif event.type == pygame.MOUSEMOTION:
            mouse_x, mouse_y = event.pos
            if mouse_pressed:
                color_square(mouse_x, mouse_y)

        elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:
            if rec_button_rect.collidepoint(event.pos):
                on_rec_button()
            if clear_button_rect.collidepoint(event.pos):
                on_clear_button()

    for i in range(rows):
        for j in range(cols):
            if drawing_matrix[i, j] == 1:
                pygame.draw.rect(screen, (255, 0, 0), (j * square_size, i * square_size, square_size, square_size))
            else:
                pygame.draw.rect(screen, (0, 0, 0), (j * square_size, i * square_size, square_size, square_size))

    draw_button(rec_button_color, rec_button_rect)
    draw_button(clear_button_color, clear_button_rect)

    pygame.display.flip()

so, now that I run the app, draw the digit 3, and click the green button that runs on_rec_button function, I expected to see Image has been recognized as number 3, but I get Image has been recognized as number 0.

This is what I draw:

enter image description here

These are training data:

enter image description here enter image description here enter image description here

These are very small because of the resolution 5x7 that was required in the exercise.

When I draw the digit 1 then I get 2 results: Image has been recognized as number 0 Image has been recognized as number 1

enter image description here

What should I do to make it work the way I want? I don't expect this to work 100% accurate but I guess it could be better.


Solution

  • There seems to be a few issues in the code, I will try to address them:

    • It's missing the back progation function derivatives, as metioned in comments! Those are very important because they are the ones that guide the correction to the correct dirrection (based on the gradient).
    • simillarly, the bias is not calculated correclty.

    Here is a working code:

    
    def sigmoid(x):
       return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(x):
       return x * (1 - x)
    
    class Perceptron:
        def __init__(self, learning_rate=0.01, n_iters=1000):
            self.lr = learning_rate
            self.n_iters = n_iters
            self.weights = None
            self.bias = None
    
        def fit(self, X, y):
            n_samples, n_features = X.shape
            self.bias = 0
            self.weights = np.zeros(n_features)
            for _ in range(self.n_iters):
                for x_i, y_i in zip(X, y):
                    linear_output = np.dot(x_i, self.weights) + self.bias
                    y_predicted = sigmoid(linear_output)
                    
                    error =  y_i - y_predicted
                    
                    output_error = error * sigmoid_derivative(y_predicted)
                    
                    self.weights += x_i.T.dot(output_error) * self.lr
                    
                    
                    self.bias += np.sum(output_error, axis=0, keepdims=True) * self.lr
    
        def predict(self, X):
            linear_output = np.dot(X, self.weights) + self.bias
            y_predicted = sigmoid(linear_output)
            return y_predicted
    

    As the main question is about the perceptron, I prefered to skip the pygame code. I used from keras.datasets import mnist to mock the images. The result correlate, given I didn't changed the Percetron class signature or main functionality. Here is the testing code:

    from keras.datasets import mnist
    
    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
    
    
    train_images_resized = np.zeros((train_images.shape[0], 784))
    test_images_resized = np.zeros((test_images.shape[0], 784))
    
    
    
    for i in range(train_images.shape[0]):
        train_images_resized[i] = np.resize(train_images[i]/np.max(train_images[i]), 784).flatten()
    
    for i in range(test_images.shape[0]):
        test_images_resized[i] = np.resize(test_images[i]/np.max(train_images[i]), 784).flatten()
    
    desired_digit = 1
    
    train_labels = [ 1 if label == desired_digit else 0 for label in train_labels]
    test_labels = [ 1 if label == desired_digit else 0 for label in test_labels]
    
    
    digits_to_recognize = [desired_digit]
    
    
    X, y = train_images_resized,train_labels
    p = Perceptron(learning_rate=0.05,n_iters=100000)
    p.fit(X, y)
    

    Note that I had to normalize (divide the the max value of each image) the input data so that the sigmoid function don't get saturated, making the derivative function = 0.


    Results!

    p.predict(test_images_resized)
    
    array([0.004823, 0.531128, 0.94834 , 0.000155, 0.002682, 0.981524,
           0.008962, 0.067788, 0.017121, 0.00063 ])
    
    test_labels
    
    [0, 0, 1, 0, 0, 1, 0, 0, 0, 0]