Search code examples
pythonneural-networknumpy-ndarray

Shapes (4,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0)


Hello, I am new to Coding and am trying to work on neural networks, I have be trying to get values for a project where we check the XOR gate using basic code, but i am gettign stuck here, Could someone please help?

W = np.array([[20],
              [20]])

class Layer():

  def __init__(self, W, b):
    self.m = W.shape[0]
    self.n = W.shape[1]
    self.W = W
    self.b = b

  def activate(self, X):
    z = np.dot(X, self.W) + self.b
    return sigmoid(z)

OR_layer = Layer(W, -10)

Output: array([[0.], [1.], [1.], [1.]])

This works fine to get the OR layer but when trying to Compute the XOR layer i get the error>>>

W1 = np.array([[-20],[-20]])
b1 = np.array(30)

W2 = np.array([[20],[20]])
b2 = np.array(-30)
#print(np.dot(W1,b1))
hidden_layer = Layer(W1, b1)
output_layer = Layer(W2, b2)

#Based on the previous code, the weights and biases have been updated in the code



class Network():

  def __init__(self, hidden, output):
    self.hidden = hidden
    self.output = output

  def activate(self, X):
    z = self.hidden.activate(X)
    return self.output.activate(z)

xor_gate = Network(hidden_layer, output_layer)

xor_output = xor_gate.activate(logic_inputs)#throwing error
np.round(xor_output)


This code is broken, i tried to mend it but had hard time to do so.

Output should come to: array([[0.], [1.], [1.], [0.]])>

This code is broken, i tried to mend it but had hard time to do so.


Solution

  • Matrix Multiplication Dimension Compatibility

    The hidden layer weight matrix was W1 = np.array([[-20],[-20]]), with a shape of (2, 1), while the input data has a shape of (4, 2). Therefore, the dot product np.dot(X, self.W) inside the Layer() class do not have the proper dimensions compatible for matrix multiplication.

    Code:

    import numpy as np
    
    class Layer:
        def __init__(self, W, b):
            self.m = W.shape[0]
            self.n = W.shape[1]
            self.W = W
            self.b = b
    
        def activate(self, X):
            z = np.dot(X, self.W) + self.b
            return sigmoid(z)
    
    class Network:
        def __init__(self, hidden, output):
            self.hidden = hidden
            self.output = output
    
        def activate(self, X):
            z_hidden = self.hidden.activate(X)
            return self.output.activate(z_hidden)
    
    
    W1, b1 = np.array([[10, -10], [-10, 10]]), np.array([[-5, 30]])
    W2, b2 = np.array([[20], [20]]), np.array([-30])
    
    hidden_layer = Layer(W1, b1)
    output_layer = Layer(W2, b2)
    
    sigmoid = lambda x: 1 / (1 + np.exp(-x))
    xor_gate = Network(hidden_layer, output_layer)
    xor_output = xor_gate.activate(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
    print(np.round(xor_output))
    
    

    Prints

    [[0.]
     [0.]
     [1.]
     [0.]]