I'm trying to build a simple Neural Network library from scratch similar to Keras, but I'm having issues getting the training to work properly. It's been a while since I've written a NN from scratch instead of using a library, so I thought it would be good practice.
I'm not quite sure I have the constructor set up properly for the case that no input shape is given, and I keep running into the "ValueError: shapes X and Y not aligned" issue regardless of what number of neurons I pass the layer or the input shape. Here's the traceback:
Traceback (most recent call last): File "NNfromScratch.py", line 551, in <module>
model.train(X_train, y_train, epochs=100, batch_size=10, verbose=True)
File "NNfromScratch.py", line 427, in train
File "NNfromScratch.py", line 395, in forward
self.outputs = layer.forward(self.outputs)
File "NNfromScratch.py", line 153, in forward
**self.outputs = np.dot(self.weights.T, inputs) + self.biases**
File "<__array_function__ internals>", line 6, in dot
ValueError: shapes (784,32) and (10,784) not aligned: 32 (dim 1) != 10 (dim 0)
The error is thrown from the forward function of the Dense layer.
The full (reproducible) code can be seen here.
Here's the snippet of the most important parts, though:
import time
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
import tensorflow.keras.datasets.mnist as mnist
class Layers:
class Dense:
def __init__(self, neurons=0, activation=Activations.ReLU, inputs=0, dropout_rate=1):
# Initialize weights and biases
self.weights = np.random.randn(neurons, inputs)
self.biases = np.random.randn(1, neurons)
self.activation = activation
self.dropout_rate = dropout_rate
# Forward-Propagation
def forward(self, inputs):
self.inputs = inputs
self.outputs = np.dot(self.weights.T, inputs) + self.biases
self.outputs = self.activation(self.outputs)
self.outputs = self.dropout(self.outputs)
return self.outputs
# Backward-Propagation
def backward(self, error, learning_rate):
self.error = error
self.delta = self.error * self.activation(self.outputs)
self.delta = self.dropout(self.delta, derivative=True)
self.weights -= learning_rate * np.dot(self.delta, self.inputs.T)
self.biases -= learning_rate * np.sum(self.delta, axis=0, keepdims=True)
return self.delta
# Dropout
def dropout(self, x, derivative=False):
if derivative:
return self.dropout_rate * (1 - self.dropout_rate) * x
return self.dropout_rate * x
class NeuralNetwork:
def forward(self, inputs):
# Forward-Propagation
self.inputs = inputs
self.outputs = self.inputs
for layer in self.layers:
self.outputs = layer.forward(self.outputs)
return self.outputs
def backward(self, targets):
# Backward-Propagation
self.targets = targets
self.error = self.loss(self.outputs, self.targets)
self.delta = self.error
for layer in reversed(self.layers):
self.delta = layer.backward(self.delta, self.optimizer_kwargs)
return self.delta
def update_weights(self):
# Update weights and biases
for layer in self.layers:
def train(self, inputs, targets, epochs=1, batch_size=1, verbose=False):
self.epochs = epochs
self.epoch_errors = []
self.epoch_losses = []
self.epoch_accuracies = []
self.epoch_times = []
start = time.time()
for epoch in range(self.epochs):
epoch_start = time.time()
epoch_error = 0
epoch_loss = 0
epoch_accuracy = 0
for i in range(0, inputs.shape[0], batch_size):
batch_inputs = inputs[i:i+batch_size]
batch_targets = targets[i:i+batch_size]
epoch_error += self.error.sum()
epoch_loss += self.loss(self.outputs, self.targets).sum()
epoch_accuracy += self.accuracy(self.outputs, self.targets)
epoch_time = time.time() - epoch_start
if verbose:
print('Epoch: {}, Error: {}, Loss: {}, Accuracy: {}, Time: {}'.format(epoch, epoch_error, epoch_loss, epoch_accuracy, epoch_time))
self.train_time = time.time() - start
return self.epoch_errors, self.epoch_losses, self.epoch_accuracies, self.epoch_times
# Load and flatten data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape((X_train.shape[0], -1))
X_test = X_test.reshape((X_test.shape[0], -1))
# Build model
model = NeuralNetwork([
Layers.Dense(32, Activations.ReLU, inputs=X_train.shape[1]),
Layers.Dense(10, Activations.ReLU),
Layers.Dense(1, Activations.Softmax)
], Losses.Categorical_Cross_Entropy, Optimizers.SGD, learning_rate=0.01)
model.train(X_train, y_train, epochs=100, batch_size=10, verbose=True)
model.evaluate(X_test, y_test)
Change this line:
self.outputs = np.dot(self.weights.T, inputs) + self.biases
self.outputs = np.dot(inputs, self.weights.T) + self.biases
The reason being is that the inner dimensions need to align. Your inputs
is of shape [B,784]
(where B
is batch size) and your weights are of shape [32,784]