Search code examples
calgorithmmachine-learningneural-networkperceptron

Perceptron learning algorithm not converging to 0


Here is my perceptron implementation in ANSI C:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

float randomFloat()
{
    srand(time(NULL));
    float r = (float)rand() / (float)RAND_MAX;
    return r;
}

int calculateOutput(float weights[], float x, float y)
{
    float sum = x * weights[0] + y * weights[1];
    return (sum >= 0) ? 1 : -1;
}

int main(int argc, char *argv[])
{
    // X, Y coordinates of the training set.
    float x[208], y[208];

    // Training set outputs.
    int outputs[208];

    int i = 0; // iterator

    FILE *fp;

    if ((fp = fopen("test1.txt", "r")) == NULL)
    {
        printf("Cannot open file.\n");
    }
    else
    {
        while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF)
        {
            if (outputs[i] == 0)
            {
                outputs[i] = -1;
            }
            printf("%f   %f   %d\n", x[i], y[i], outputs[i]);
            i++;
        }
    }

    system("PAUSE");

    int patternCount = sizeof(x) / sizeof(int);

    float weights[2];
    weights[0] = randomFloat();
    weights[1] = randomFloat();

    float learningRate = 0.1;

    int iteration = 0;
    float globalError;

    do {
        globalError = 0;
        int p = 0; // iterator
        for (p = 0; p < patternCount; p++)
        {
            // Calculate output.
            int output = calculateOutput(weights, x[p], y[p]);

            // Calculate error.
            float localError = outputs[p] - output;

            if (localError != 0)
            {
                // Update weights.
                for (i = 0; i < 2; i++)
                {
                    float add = learningRate * localError;
                    if (i == 0)
                    {
                        add *= x[p];
                    }
                    else if (i == 1)
                    {
                        add *= y[p];
                    }
                    weights[i] +=  add;
                }
            }

            // Convert error to absolute value.
            globalError += fabs(localError);

            printf("Iteration %d Error %.2f %.2f\n", iteration, globalError, localError);

            iteration++;
        }

        system("PAUSE");

    } while (globalError != 0);

    system("PAUSE");
    return 0;
}

The training set I'm using: Data Set

I have removed all irrelevant code. Basically what it does now it reads test1.txt file and loads values from it to three arrays: x, y, outputs.

Then there is a perceptron learning algorithm which, for some reason, is not converging to 0 (globalError should converge to 0) and therefore I get an infinite do while loop.

When I use a smaller training set (like 5 points), it works pretty well. Any ideas where could be the problem?

I wrote this algorithm very similar to this C# Perceptron algorithm:


EDIT:

Here is an example with a smaller training set:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

float randomFloat()
{
    float r = (float)rand() / (float)RAND_MAX;
    return r;
}

int calculateOutput(float weights[], float x, float y)
{
    float sum = x * weights[0] + y * weights[1];
    return (sum >= 0) ? 1 : -1;
}

int main(int argc, char *argv[])
{
    srand(time(NULL));

    // X coordinates of the training set.
    float x[] = { -3.2, 1.1, 2.7, -1 };

    // Y coordinates of the training set.
    float y[] = { 1.5, 3.3, 5.12, 2.1 };

    // The training set outputs.
    int outputs[] = { 1, -1, -1, 1 };

    int i = 0; // iterator

    FILE *fp;

    system("PAUSE");

    int patternCount = sizeof(x) / sizeof(int);

    float weights[2];
    weights[0] = randomFloat();
    weights[1] = randomFloat();

    float learningRate = 0.1;

    int iteration = 0;
    float globalError;

    do {
        globalError = 0;
        int p = 0; // iterator
        for (p = 0; p < patternCount; p++)
        {
            // Calculate output.
            int output = calculateOutput(weights, x[p], y[p]);

            // Calculate error.
            float localError = outputs[p] - output;

            if (localError != 0)
            {
                // Update weights.
                for (i = 0; i < 2; i++)
                {
                    float add = learningRate * localError;
                    if (i == 0)
                    {
                        add *= x[p];
                    }
                    else if (i == 1)
                    {
                        add *= y[p];
                    }
                    weights[i] +=  add;
                }
            }

            // Convert error to absolute value.
            globalError += fabs(localError);

            printf("Iteration %d Error %.2f\n", iteration, globalError);          
        }

        iteration++;

    } while (globalError != 0);

    // Display network generalisation.
    printf("X       Y     Output\n");
    float j, k;
    for (j = -1; j <= 1; j += .5)
    {
        for (j = -1; j <= 1; j += .5)
        {
            // Calculate output.
            int output = calculateOutput(weights, j, k);
            printf("%.2f  %.2f  %s\n", j, k, (output == 1) ? "Blue" : "Red");
        }
    }

    // Display modified weights.
    printf("Modified weights: %.2f %.2f\n", weights[0], weights[1]);

    system("PAUSE");
    return 0;
}

Solution

  • In your current code, the perceptron successfully learns the direction of the decision boundary BUT is unable to translate it.

        y                              y
        ^                              ^
        |  - + \\  +                   |  - \\ +   +
        | -    +\\ +   +               | -   \\  + +   +
        | - -    \\ +                  | - -  \\    +
        | -  -  + \\  +                | -  -  \\ +   +
        ---------------------> x       --------------------> x
            stuck like this            need to get like this
    

    (as someone pointed out, here is a more accurate version)

    The problem lies in the fact that your perceptron has no bias term, i.e. a third weight component connected to an input of value 1.

           w0   -----
        x ---->|     |
               |  f  |----> output (+1/-1)
        y ---->|     |
           w1   -----
                   ^ w2
        1(bias) ---|
    

    The following is how I corrected the problem:

    #include <stdio.h>
    #include <stdlib.h>
    #include <math.h>
    #include <time.h>
    
    #define LEARNING_RATE    0.1
    #define MAX_ITERATION    100
    
    float randomFloat()
    {
        return (float)rand() / (float)RAND_MAX;
    }
    
    int calculateOutput(float weights[], float x, float y)
    {
        float sum = x * weights[0] + y * weights[1] + weights[2];
        return (sum >= 0) ? 1 : -1;
    }
    
    int main(int argc, char *argv[])
    {
        srand(time(NULL));
    
        float x[208], y[208], weights[3], localError, globalError;
        int outputs[208], patternCount, i, p, iteration, output;
    
        FILE *fp;
        if ((fp = fopen("test1.txt", "r")) == NULL) {
            printf("Cannot open file.\n");
            exit(1);
        }
    
        i = 0;
        while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) {
            if (outputs[i] == 0) {
                outputs[i] = -1;
            }
            i++;
        }
        patternCount = i;
    
        weights[0] = randomFloat();
        weights[1] = randomFloat();
        weights[2] = randomFloat();
    
        iteration = 0;
        do {
            iteration++;
            globalError = 0;
            for (p = 0; p < patternCount; p++) {
                output = calculateOutput(weights, x[p], y[p]);
    
                localError = outputs[p] - output;
                weights[0] += LEARNING_RATE * localError * x[p];
                weights[1] += LEARNING_RATE * localError * y[p];
                weights[2] += LEARNING_RATE * localError;
    
                globalError += (localError*localError);
            }
    
            /* Root Mean Squared Error */
            printf("Iteration %d : RMSE = %.4f\n",
                iteration, sqrt(globalError/patternCount));
        } while (globalError > 0 && iteration <= MAX_ITERATION);
    
        printf("\nDecision boundary (line) equation: %.2f*x + %.2f*y + %.2f = 0\n",
            weights[0], weights[1], weights[2]);
    
        return 0;
    }
    

    ... with the following output:

    Iteration 1 : RMSE = 0.7206
    Iteration 2 : RMSE = 0.5189
    Iteration 3 : RMSE = 0.4804
    Iteration 4 : RMSE = 0.4804
    Iteration 5 : RMSE = 0.3101
    Iteration 6 : RMSE = 0.4160
    Iteration 7 : RMSE = 0.4599
    Iteration 8 : RMSE = 0.3922
    Iteration 9 : RMSE = 0.0000
    
    Decision boundary (line) equation: -2.37*x + -2.51*y + -7.55 = 0
    

    And here's a short animation of the code above using MATLAB, showing the decision boundary at each iteration:

    screenshot