I'm following this article. I'm using the article to understand the logic, but I've implemented it differently using structs
.
The problem is that it never converges to the desired outputs. I don't get the output that I want. Which means that the weights are not updated correctly. If I train the network for a sufficiently large number of times, the output stop changing, which means the weights don't get updated so the network thinks that it has got the correct weights, but the output shows otherwise.
Each neuron holds an array of Paths
. Weight is a property of Path
and the Neurons
can send values 'down a' Path
and it gets to the Neuron
on the other side.
Below is the code...
const double e =2.7182818284;
Neuron:
struct Neuron{
double value; //Local input
double bias;
double gradient;
double out; //Output value
Path *p; //path array
int nc; //number of paths/connections belonging to *this
};
Path:
struct Path{
double weight;
double prevDelta;
double delta;
int nid; //neuron id in *n
};
Layers
contain Neurons
. Paths
identify Neurons
with an integer nid
, which is the index of the Neuron
array in the neighboring Layer
.
Layer:
struct Layer{
int nneurons; //number of neurons in cluster/layer
Neuron *n; //Local Neuron array to reference
Layer *neighbor;
void Transfer(int nid) //compute target Neuron's input and assign it to it
{
double valueOut=0;
Neuron *temp;
temp=&n[nid];
//for each connection, send w*v to paired neuron
for(int i=0; i<n[nid].nc; i++)
{
valueOut=temp->p[i].weight * temp->out;
//neuron nid(as referenced by p[]), which is in the other layer, receives the value
neighbor->n[temp->p[i].nid].value+=valueOut;
}
}
void Initialize(int size)
{
nneurons=size;
n=new Neuron[nneurons];
for(int i=0; i<nneurons; i++)
{
n[i].value=0.0;
n[i].bias=1.0;
n[i].out=0.0;
}
}
void FormConnections(Layer& nl)//with neighboring layer
{
neighbor=&nl;
int nCon=neighbor->nneurons;
for(int i=0; i<nneurons; i++)
{
n[i].nc=nCon;
n[i].p=new Path[nCon];
//neuron 'i' will link its paths to neurons in the other layer
for(int ii=0; ii<n[i].nc; ii++)
{
n[i].p[ii].weight=1.0;
n[i].p[ii].prevDelta=0.0;
n[i].p[ii].nid=ii;
}
}
}
};
Brain(the Neural Network):
class Brain{
public:
double eta;
double alpha;
Layer input,
hidden,
output;
double *target;
void GetInput(double* in){
for(int i=0; i<input.nneurons; i++)
input.n[i].value=in[i];
}
void GetDesiredOutput(double* t)
{
target=t;
}
void Initialize(int inputsize, int hiddensize, int outputsize)
{
input.Initialize(inputsize);
hidden.Initialize(hiddensize);
output.Initialize(outputsize);
input.FormConnections(hidden);
hidden.FormConnections(output);
}
void BP()
{
//Calculate gradients for output
for(int i=0; i<output.nneurons; i++)
{output.n[i].gradient=(target[i] - output.n[i].out) * (1 - output.n[i].out) * (1 + output.n[i].out);}
Neuron* temp;
for(int i=0; i<hidden.nneurons; i++)
{
temp=&hidden.n[i];
temp->gradient=0;
//for each connection...
for(int ii=0; ii<hidden.n[i].nc; ii++)
{
//temp's gradient gets values in the form w1*g2 + w2*g2 + ... + wn*gn,
//where w is the weight of p that leads to output.n[i] from temp(hidden), and g
//is the gradient of that output at p[CurrentConnection].nid
temp->gradient+= temp->p[ii].weight * output.n[temp->p[ii].nid].gradient;
}
//Multiply the resultant sums with d/dx S(x)
temp->gradient*= (temp->out)*(1-temp->out);
}
/
/---------------------------------------------------------------------------
//Calculate delta
for(int i=0; i<input.nneurons; i++)
{
temp=&input.n[i];
//temp->bias=eta*temp->gradient;
for(int ii=0; ii<input.n[i].nc; ii++)
{
temp->p[ii].delta=eta* hidden.n[temp->p[ii].nid].gradient* temp->value;
temp->p[ii].weight=temp->p[ii].prevDelta*alpha+temp->p[ii].delta;
temp->p[ii].prevDelta=temp->p[ii].delta;
}
}
for(int i=0; i<hidden.nneurons; i++)
{
temp=&hidden.n[i];
temp->bias=eta*temp->gradient;
for(int ii=0; ii<hidden.n[i].nc; ii++)
{ temp->p[ii].delta=eta* output.n[temp->p[ii].nid].gradient* temp->value;
temp->p[ii].weight=temp->p[ii].prevDelta*alpha+temp->p[ii].delta;
temp->p[ii].prevDelta=temp->p[ii].delta;
}
}
for(int i=0; i<output.nneurons; i++)
{
temp=&output.n[i];
temp->bias=eta*temp->gradient;
}
Zero(hidden);
Zero(output);
}
void Process()
{
for(int i=0; i<input.nneurons; i++)
{ input.n[i].out=input.n[i].value;
input.Transfer(i);//transfer each neuron data in input to hidden
}
for(int i=0; i<hidden.nneurons; i++)
{
hidden.n[i].out=Sigmoid(hidden.n[i].value + hidden.n[i].bias);
hidden.Transfer(i);
}
for(int i=0; i<output.nneurons; i++)
{
output.n[i].out=HyperTan(output.n[i].value + output.n[i].bias);
cout<<"Output "<<i<<": "<<output.n[i].out<<endl;
}
}
void Zero(Layer &l){ for(int i=0; i<l.nneurons; i++) l.n[i].value=0.0;}
void Randomize(Layer &l)
{
for(int i=0; i<l.nneurons; i++)
{
for(int ii=0; ii<l.n[i].nc; ii++)
{
l.n[i].p[ii].weight=rand()%100/10;
}
}
}
Brain(){eta=0.9; alpha=0.4;}
double Sigmoid(double x)
{
if (x < -45.0) return 0.0;
else if (x > 45.0) return 1.0;
else return (1.0 / (1.0 + pow(e, -x)));
}
double HyperTan(double x)
{
if (x < -10.0) return -1.0;
else if (x > 10.0) return 1.0;
else return tanh(x);
}
};
A typical program would be:
Brain b;
double data[]={1.0,2.0, 3.0};
double ddata[]={-0.25,0.14};
b.Initialize(3,4,2);
b.GetDesiredOutput(ddata);
b.GetInput(data);
b.Process();
b.BP();
Example: eta= 0.9, alpha= 0.4 for input 1.0, 2.0, 3.0
I get:
-0.117471 and 0.0661122
The expected output is:
-0.25, 0.14
UPDATE(25/12/2013): The problem was in calculating the delta values for hidden-to-output weights, and there was an additional error when updating the weights using these new deltas, which all take place in the same for loop. I simply assigned the new weights, while I was supposed to add them to the previous weights.
The problem was in calculating the delta values for the weights, and there was an additional error when updating the weights using these new deltas, which all take place in the same for-loop. I simply assigned the new weights, while I was supposed to add them to the previous weights. Correct delta calculation and weight assignment:
for(int i=0; i<input.nneurons; i++)
{
temp=&input.n[i];
for(int ii=0; ii<input.n[i].nc; ii++)
{
temp->p[ii].delta=eta* hidden.n[temp->p[ii].nid].gradient* temp->out;
temp->p[ii].weight+=temp->p[ii].prevDelta*alpha +temp->p[ii].delta;
temp->p[ii].prevDelta=temp->p[ii].delta;
}
}
And for hidden to output:
for(int i=0; i<hidden.nneurons; i++)
{
temp=&hidden.n[i];
temp->bd=eta*temp->gradient;
temp->bias+=temp->bd+ alpha*temp->pbd;
temp->pbd=temp->bd;
for(int ii=0; ii<hidden.n[i].nc; ii++)
{
temp->p[ii].delta=eta* output.n[temp->p[ii].nid].gradient* temp->out;
temp->p[ii].weight+=temp->p[ii].prevDelta*alpha+ temp->p[ii].delta;
temp->p[ii].prevDelta=temp->p[ii].delta;
}
}