java neural-network xor backpropagation gradient-descent

Neural Network bad convergeance

I read a lot about NN last two weeks, I think i saw pretty much every "XOR" approach tutorials on net. But, i wasn't able to make work my own one. I started by a simple "OR" neuron approach. Giving good results. I think my problem is in backpropagation implementation. I did an object approach, so here are the main lines.

Three classes :

Neuron

public class Neuron {

/*
 * Attributes
 */

double[] inputs;
double[] weights;

double output;
double error;

double delta;
double deltaWeight;

/*
 * Constructors
 */

public Neuron(int nInputs) 
{
    inputs = new double[nInputs + 1];
    inputs[inputs.length - 1] = 1; // bias
    weights = new double[nInputs + 1];
}

/*
 * Methods
 */

/**
 * Reset all weights of the neuron to random values between -1 and 1
 */
public void reset()
{       
    Random random = new Random();
    for (int i = 0; i < weights.length; i++)
        weights[i] = (random.nextDouble() * ((0.5d - (-0.5d))) + (-0.5d));
}

/**
 * Compute output for given inputs
 * @param inputs
 */
public void computeOutput(double inputs[])
{
    setInputs(inputs);
    output = Sigmoid.activation(getDotProduct());
}

/**
 * Compute error for given ideal
 * @param ideal
 */
public void computeError(double ideal)
{
    error = ideal - output;
    delta = error;
}

/**
 * Compute error for hidden neurons
 */
public void computeError(FeedForwardLayer previousLayer, int position)
{
    double sum = 0;
    for (int i = 0; i < previousLayer.neurons.length; i++)
        sum += (previousLayer.neurons[i].delta * previousLayer.neurons[i].weights[position]);

    delta = Sigmoid.derivative(getDotProduct()) * sum;
    error = delta;
}

/**
 * Adjust every weight of the neuron
 */
public void adjustWeights(double lambda, double momentum)
{
    for (int i = 0; i < weights.length; i++) 
    {
        double lastDeltaWeight = deltaWeight;
        deltaWeight = lambda * (delta * inputs[i]) + momentum * lastDeltaWeight;
        weights[i] += deltaWeight;
    }
}

@Override
public String toString() 
{
    String str = "";
    for (int i = 0; i < weights.length; i++)
        str = str.concat(String.format("IN|W --> %.6f | %.6f \n", (float) inputs[i], (float) weights[i]));

    str = str.concat("Output = " + output + "\n");
    str = str.concat("Error = " + error + "\n");
    return str;
}

/*
 * Getters & Setters
 */

/**
 * @return weights * inputs + bias
 */
public double getDotProduct()
{
    double sum = 0;
    for (int i = 0; i < inputs.length; i++)
        sum += (weights[i] * inputs[i]);

    return sum;
}

/**
 * Set inputs (keep bias input)
 * @param inputs
 */
public void setInputs(double[] inputs)
{
    for (int i = 0; i < inputs.length; i++)
        this.inputs[i] = inputs[i];
}

/**
 * Set every weight to a single value
 * @param weight
 */
public void setWeights(double weight)
{
    for (int i = 0; i < weights.length; i++)
        this.weights[i] = weight;
}
}

FeedForwardLayer (which contain neurons)

public class FeedForwardLayer {

/*
 * Attributes
 */

Neuron[] neurons;
LayerTypes type;

/*
 * Constructors
 */

/**
 * First layer constructor
 * @param nNeurons
 */
public FeedForwardLayer(int nInputs, int nNeurons, LayerTypes type) 
{
    neurons = new Neuron[nNeurons];
    for (int i = 0; i < neurons.length; i++)
        neurons[i] = new Neuron(nInputs);

    this.type = type;
}

/*
 * Methods
 */

/**
 * Reset all weights of the layer's neurons to random values between -1 and 1
 */
public void reset()
{
    for (Neuron neuron : neurons) 
        neuron.reset();
}

/**
 * Compute output, if layer isn't input one, you can pass null into parameter
 * @param inputs
 */
public void computeOutputs(double[] inputs)
{
    for (int i = 0; i < neurons.length; i++) 
        neurons[i].computeOutput(inputs);
}

/**
 * Compute error, if layer is output one
 * @param ideals
 */
public void computeErrors(double[] ideals)
{
    for (int i = 0; i < neurons.length; i++)
        neurons[i].computeError(ideals[i]);
}

/**
 * Compute error, if layer isn't output one
 * @param layer n+1
 */
public void computeErrors(FeedForwardLayer next)
{
    for (int i = 0; i < neurons.length; i++)
        neurons[i].computeError(next, i);
}

/**
 * Adjust weights for every neurons
 */
public void adjustWeights(double lambda, double momentum)
{
    for (Neuron neuron : neurons) 
        neuron.adjustWeights(lambda, momentum);
}

@Override
public String toString()
{
    String str = "";
    for (int i = 0; i < neurons.length; i++)
        str = str.concat("Neuron " + i + "\n" + neurons[i]);
    return str;
}

/*
 * Getters - Setters
 */

/**
 * @return true if layer is input, false otherwise
 */
public boolean isInput()
{
    if (type == LayerTypes.INPUT)
        return true;

    return false;
}

/**
 * @return true if layer is input, false otherwise
 */
public boolean isOutput()
{
    if (type == LayerTypes.OUTPUT)
        return true;

    return false;
}

/**
 * @return an array of layer's outputs
 */
public double[] getOutputs()
{
    double[] outputs = new double[neurons.length];

    for (int i = 0; i < neurons.length; i++) 
        outputs[i] = neurons[i].output;

    return outputs;
}

/**
 * @return array of layer's errors
 */
public double[] getErrors()
{
    double[] errors = new double[neurons.length];

    for (int i = 0; i < neurons.length; i++)
        errors[i] = neurons[i].error;

    return errors;
}

/**
 * Set all the weights of the layer to given weight
 * @param weight
 */
public void setWeights(double weight)
{
    for (int i = 0; i < neurons.length; i++)
        neurons[i].setWeights(weight);
}
}

FeedForwardNetwork (which contain FeedForwardLayers)

public class FeedForwardNetwork {

static final double lambda = 0.1;
static final double momentum = 0;

/*
 * Attributes
 */

private ArrayList<FeedForwardLayer> layers;

/*
 * Constructors
 */

public FeedForwardNetwork() 
{
    layers = new ArrayList<FeedForwardLayer>();
}

/*
 * Methods
 */

/**
 * Init all the weights to random values
 */
public void reset()
{       
    for (int i = 0; i < layers.size(); i++)
        layers.get(i).reset();;
}

/**
 * Compute output for all the neurons of all the layers for given inputs
 * @param inputs
 */
public void feedForward(double[] inputs)
{
    //System.err.println("FeedForwardNetwork.feedForward(" + inputs[0] + ", " + inputs[1] +")");
    for (int i = 0; i < layers.size(); i++) 
    {
        //System.err.println("\n*** COMPUTING OUTPUT FOR LAYER " + i + "***\n");
        if (layers.get(i).isInput())
            layers.get(i).computeOutputs(inputs);
        else
            layers.get(i).computeOutputs(layers.get(i - 1).getOutputs());
    }
}

/**
 * Compute errors for all the neurons of all the layers starting by output layer
 * @param ideals
 */
public void feedBackward(double[] ideals)
{
    //System.err.println("FeedForwardNetwork.feedBackward(" + ideals[0] + ")");
    // For each layers starting by output one
    for (int i = layers.size() - 1; i > 0; i--) 
    {
        //System.err.println("*** COMPUTING ERROR FOR LAYER " + i + "***");
        if (layers.get(i).isOutput())
            layers.get(i).computeErrors(ideals);
        else
            layers.get(i).computeErrors(layers.get(i + 1));
    }
}

/**
 * Adjust weights of every layer
 */
public void adjustWeights()
{
    for (FeedForwardLayer feedForwardLayer : layers) 
        feedForwardLayer.adjustWeights(lambda, momentum);
}

/**
 * Train the nn with given inputs and outputs
 * @param inputs
 * @param outputs
 */
public void train(double[] inputs, double... outputs)
{
    feedForward(inputs);
    feedBackward(outputs);
    adjustWeights();
}

/**
 * Add a layer to the network
 * @param layer
 */
public void addLayer(FeedForwardLayer layer)
{
    layers.add(layer);
}

@Override
public String toString() 
{
    String str = "";
    for (int i = 0; i < layers.size(); i++)
        str = str.concat("Layer " + LayerTypes.values()[i] + "\n" + layers.get(i));

    str = str.concat("\n");
    str = str.concat("OUTPUT = " + getOutputs()[0] + "\n");
    str = str.concat("ERROR = "  + getError(false) + "\n");
    return str;
}
/*
 * Getters & Setters
 */

public FeedForwardLayer getInputLayer()
{
    return layers.get(0);
}

public FeedForwardLayer getOutputLayer()
{
    return layers.get(layers.size() - 1);
}

public FeedForwardLayer getLayer(int index)
{
    return layers.get(index);
}

public double getError(boolean abs)
{
    if (abs)
        return Math.abs(getOutputLayer().neurons[0].error);

    return getOutputLayer().neurons[0].error;
}

public double[] getOutputs()
{
    return getOutputLayer().getOutputs();
}
}

So i train the network by giving it epoch of the xor table XOR table

The network will output after thousands epoch approximately 0.5... Interesting fact is, if i replace the training set by a AND table, a OR table or an NAND table, the nn will output the number of 1 in the S column of the training set.. (it will output 0.25 for AND and NAND table and 0.75 for OR table)

I just want to know if my implementation is good enough to make it work, ty !

Solution

So, after some research, i realized that my implementation was good, except that I didn't understand how the input layer works. That was it, the input layer works like In = Out