I'm trying to understand back-propagation, for that I using some python code, but it's noting working properly. When I train with xor input-output the error does not converge. But if I change the value of the last output of xor it converge.
If I put some target output values >1 the error converge for target-1, this dos not seen to be right.
import numpy as np
import random
class neural_network():
activation = [] #List of values with the values of activation of each layers
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
if len(sizeOfLayers) > 3:
raise ValueError('Wrong number of layers')
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
if i == 0:
#input layer + bias
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
else:
self.activation.append(sizeOfLayers[i]*[0.0])
# Wi = len(Hid) x len(IN)+1(bias)
self.weightsIn = np.random.random((sizeOfLayers[1], sizeOfLayers[0] + 1))
# Wo = len(OUT) x len(Hid)
self.weightsOut = np.random.random((sizeOfLayers[2], sizeOfLayers[1]))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#Ativation of hidden layer
self.activation[1] = (self.sigmoid(self.sumHidden))
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (self.sigmoid(self.sumOut))
return self.activation[2].T
def backPropagate(self, Y, trainRate = 0.1):
'''
Y: output target
trainRate:
'''
if len(Y) != self.sizeOfLayers[2]:
raise ValueError('Wrong number of inputs')
#Calc of output delta
error_o = Y.T - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * out_delta.T
for i in range(self.sizeOfLayers[2]):
for j in range(self.sizeOfLayers[1]):
self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
# update Input weights
change_h = self.activation[0] * hiden_delta.T
for i in range(self.sizeOfLayers[1]):
for j in range(self.sizeOfLayers[0]):
self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]
#Error
return np.sum((Y.T - self.activation[2].T)**2)/0.5
def sigmoid(self, z, derv = False):
if derv == False:
return 1/(1+np.exp(-z))
def sigmoidPrime(self, z):
return self.sigmoid(z)*(1-self.sigmoid(z))
def train(self, target, trainRate = 0.001, it = 50000):
for i in range(it):
error = 0.0
for t in target:
inputs = np.array(t[0])
targets = np.array([t[1]])
self.forward(inputs)
error = error + self.backPropagate(targets, trainRate)
nn = neural_network((2,6,1))
xor = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]] #If I change her to 1 it converges
]
nn.train(xor)
Edit: Modifications were made according to what Diego Stéfano told (thank you Diego), but the error dos not converge yet.
import numpy as np
import math
import random
from scipy.special import expit
from sklearn.preprocessing import normalize
class neural_network(object):
activation = []
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
self.weightsIn = np.random.normal(scale=0.1, size = (sizeOfLayers[1], sizeOfLayers[0] + 1))
self.weightsOut = np.random.normal(scale=0.1, size = (sizeOfLayers[2], sizeOfLayers[1] + 1))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#+bias add ativation vector
self.activation[1] = np.vstack((expit(self.sumHidden), np.array([1])))
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (expit(self.sumOut))
return self.activation[2].T
def backPropagate(self, X, Y, trainRate = 0.1):
self.forward(X)
#Calc of output delta
error_o = Y - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * np.transpose(out_delta)
self.weightsOut = self.weightsOut + trainRate*change_o.T
# update hidden weights output
change_h = self.activation[0].dot( hiden_delta[:-1].T)
self.weightsIn = self.weightsIn + trainRate*change_h.T
#error
return np.sum((Y - self.activation[2].T)**2)*0.5
def train(self, input_list, epochs):
for epoch in range(epochs):
ErrAcc = 0.0
for inputs, targets in input_list:
Err = self.backPropagate(np.array(inputs), np.array(targets), 0.2)
ErrAcc = ErrAcc + Err
if epoch % 1000 == 0:
print 'Epoch =', epoch, 'ErrAcc =', ErrAcc
def sigmoidPrime(self,x):
return expit(x)*(1-expit(x))
nn = neural_network((2,10,1))
xor = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]] #If I change her to 1 it converges
]
nn.train(xor, 300000)
Here are the modifications I've made to your code that made it work:
Add biases to the output neurons too. All neurons in the network should have it since it detaches the activation field from the origin and, consequently, shifts your activation function left or right, greatly improving the chances of successful learning.
Istead of using np.random.random
, which genearates number in the interval [0.0, 1.0) to initialize the weights, use np.random.uniform
to generate uniform random floats in [-1.0, 1.0).
Center your input space around the origin (i.e, remove the mean) and normalize it.
Here's how your initialization should be made:
for i in range(len(sizeOfLayers)):
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1))
self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1))
And then you will also have to append 1 to activation
in the function forward
:
self.activation[1] = np.vstack((self.sigmoid(self.sumHidden), np.array([1])))
You may want to change the learning rate to make it work (about 0.5 worked for me). Also, your mean-squared error calculation is wrong: you should multiply by 0.5, not divide.
Here is your modified code:
import numpy as np
import random
class neural_network():
activation = [] #List of values with the values of activation of each layers
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
if len(sizeOfLayers) > 3:
raise ValueError('Wrong number of layers')
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
#input layer + bias
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
# Wi = len(Hid) x len(IN)+1(bias)
self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1))
# Wo = len(OUT) x len(Hid)
self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#Ativation of hidden layer
self.activation[1] = np.vstack( ( self.sigmoid(self.sumHidden), np.array([1]) ) )
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (self.sigmoid(self.sumOut))
return self.activation[2].T
def backPropagate(self, Y, trainRate = 0.1):
'''
Y: output target
trainRate:
'''
if len(Y) != self.sizeOfLayers[2]:
raise ValueError('Wrong number of inputs')
#Calc of output delta
error_o = Y.T - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * out_delta.T
for i in range(self.sizeOfLayers[2]):
for j in range(self.sizeOfLayers[1]):
self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
# update Input weights
change_h = self.activation[0] * hiden_delta.T
for i in range(self.sizeOfLayers[1]):
for j in range(self.sizeOfLayers[0]):
self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]
#Error
return np.sum((Y.T - self.activation[2].T)**2)*0.5
def sigmoid(self, z, derv = False):
if derv == False:
return 1/(1+np.exp(-z))
def sigmoidPrime(self, z):
return self.sigmoid(z)*(1-self.sigmoid(z))
def train(self, target, trainRate = 0.5, it = 50000):
for i in range(it):
error = 0.0
for t in target:
inputs = np.array(t[0])
targets = np.array([t[1]])
self.forward(inputs)
error = error + self.backPropagate(targets, trainRate)
nn = neural_network((2,5,1))
xor = [
[[-1.0, -1.0], [0]],
[[-1.0, 1.0], [1]],
[[ 1.0, -1.0], [1]],
[[ 1.0, 1.0], [0]] #If I change her to 1 it converges
]
nn.train(xor)
for e in xor:
nn.forward(e[0])
print nn.activation[2]
Good luck!