I'm trying to implement an easy backpropagation algorithm for an exam (I'm a beginner programmer).
I've got a set of arrays and I generate random weights to start the algorithm.
I implemented the activation function following the math formula:
(where x index are for inputs and y index is for hidden neuron input)
My problem is that I get some summation results with very high exponential values that are incompatible with what I'd expect to be.
Here's my code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define INPUT_NEURONS 4
#define HIDDEN_NEURONS 7
#define OUTPUT_NEURONS 3
#define MAX_SAMPLES 150
#define LEARNING_RATE 0.1
#define RAND_WEIGHT ((double)rand()/(RAND_MAX+1))
double IHweight[INPUT_NEURONS][HIDDEN_NEURONS]; /* in->hid weight */
double HOweight[HIDDEN_NEURONS][OUTPUT_NEURONS]; /* hid->out weight */
//activation
double inputs[MAX_SAMPLES][INPUT_NEURONS];
double hidden[HIDDEN_NEURONS];
double target[MAX_SAMPLES][OUTPUT_NEURONS];
double actual[OUTPUT_NEURONS];
//errors
double errO[OUTPUT_NEURONS];
double errH[HIDDEN_NEURONS];
double Error = 0.0;
int sample = 0;
typedef struct {
double sepal_lenght;
double sepal_width;
double petal_lenght;
double petal_width;
double output[OUTPUT_NEURONS];
} IRIS;
IRIS samples[MAX_SAMPLES] = {
{ 5.1, 3.5, 1.4, 0.2, 0.0, 0.0, 1.0 },
{ 4.9, 3.0, 1.4, 0.2, 0.0, 0.0, 1.0 },
{ 4.7, 3.2, 1.3, 0.2, 0.0, 0.0, 1.0 },
{...},
};
double sigmoid(double val) {
return (1.0 / (1.0 + exp(-val)));
}
double dsigmoid(double val) {
return (val * (1.0 - val));
}
void assignRandomWeights() {
int hid, inp, out;
printf("Initializing weights...\n\n");
for (inp = 0; inp < INPUT_NEURONS; inp++) {
for (hid = 0; hid < HIDDEN_NEURONS; hid++) {
IHweight[inp][hid] = RAND_WEIGHT;
printf("Weights : input %d -> hidden %d: %f\n",
inp, hid, IHweight[inp][hid]);
}
}
for (hid = 0; hid < HIDDEN_NEURONS; hid++) {
for (out = 0; out < OUTPUT_NEURONS; out++) {
HOweight[hid][out] = RAND_WEIGHT;
printf("hidden %d -> output %d: %f\n",
hid, out, HOweight[hid][out]);
}
}
system("pause");
}
void activation() {
int hid, inp, out;
double sumH[HIDDEN_NEURONS] ;
double sumO[OUTPUT_NEURONS];
for (hid = 0; hid < HIDDEN_NEURONS; hid++) {
for (inp = 0; inp < INPUT_NEURONS; inp++) {
sumH[hid] += (inputs[sample][inp] * IHweight[inp][hid]);
printf("\n%d Input %d = %.1f Weight = %f sumH = %g",
sample, inp, inputs[sample][inp], IHweight[inp][hid], sumH[hid]);
}
hidden[hid] = sigmoid(sumH[hid]);
printf("\nHidden neuron %d activation = %f", hid, hidden[hid]);
}
for (out = 0; out < OUTPUT_NEURONS; out++) {
for (hid = 0; hid < HIDDEN_NEURONS; hid++) {
sumO[out] += (hidden[hid] * HOweight[hid][out]);
printf("\n%d Hidden %d = %f Weight = %f sumO = %g",
sample, hid, hidden[hid], HOweight[hid][out], sumO[out]);
}
actual[out] = sigmoid(sumO[out]);
printf("\nOutput neuron %d activation = %f", out, actual[out]);
}
}
main () {
srand(time(NULL));
assignRandomWeights();
for (int epoch = 0; epoch < 1; epoch++) {
for (int i = 0; i < 1; i++) {
sample = rand() % MAX_SAMPLES;
inputs[sample][0] = samples[sample].sepal_lenght;
inputs[sample][1] = samples[sample].sepal_width;
inputs[sample][2] = samples[sample].petal_lenght;
inputs[sample][3] = samples[sample].petal_width;
target[sample][0] = samples[sample].output[0];
target[sample][1] = samples[sample].output[1];
target[sample][2] = samples[sample].output[2];
activation();
}
}
}
I'm using a lot of printf()
to check my results and i get
...
41 Input 0 = 4.5 Weight = 0.321014 sumH = 1.31886e+267
41 Input 1 = 2.3 Weight = 0.772369 sumH = 1.31886e+267
41 Input 2 = 1.3 Weight = 0.526123 sumH = 1.31886e+267
41 Input 3 = 0.3 Weight = 0.271881 sumH = 1.31886e+267
Hidden neuron 6 activation = 1.000000
...
41 Hidden 0 = 0.974952 Weight = 0.343445 sumO = 1.24176e+267
41 Hidden 1 = 0.917789 Weight = 0.288361 sumO = 1.24176e+267
41 Hidden 2 = 0.999188 Weight = 0.972168 sumO = 1.24176e+267
41 Hidden 3 = 0.989726 Weight = 0.082642 sumO = 1.24176e+267
41 Hidden 4 = 0.979063 Weight = 0.531799 sumO = 1.24176e+267
41 Hidden 5 = 0.972474 Weight = 0.552521 sumO = 1.24176e+267
41 Hidden 6 = 1.000000 Weight = 0.707153 sumO = 1.24176e+267
Output neuron 1 activation = 1.000000
The assignRandomweights()
and sigmoid()
functions are ok as far as i can tell, the problem is in activation()
.
Please help me understand why this happens and how to solve it.
Your problem is in these lines
double sumH[HIDDEN_NEURONS];
double sumO[OUTPUT_NEURONS];
You don't initialise them before use. Technically the program behaviour is undefined. Your friendly compiler appears to be setting uninitialised variables to large values. (Other platforms such as Itanium will trap a "Not a Thing").
A simple remedy is to use double sumH[HIDDEN_NEURONS] = {0};
etc. which will set every element to zero.