Search code examples

Am I implementing my perceptron with backpropagation correctly?

I'm learning about perceptrons in class and how to use backpropagation to train the model. I'm currently having trouble with my implementation because it's only providing me a 50% accuracy rate with the data I'm given, while the majority of others in my class is getting a 90% rate. Is there something I'm overlooking in my implementation? This is what I have so far from the sources I've looked at.

class Perceptron():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = np.random.rand(num_features) * 0.1 # This makes an array filled w/ zeros with the shape of num_features
        self.bias = 0.0
    def forward(self, x):
        linear =, self.weights) + self.bias
        predictions = np.where(linear > 0, 1, 0)
        return predictions
    def backward(self, x, y, predictions):
        errors = y - predictions
        self.weights += self.learning_rate *, errors)
        self.bias += self.learning_rate * np.sum(errors)
        return errors
    def train(self, x, y, epochs, learning_rate = 0.01):
        self.learning_rate = learning_rate
        for e in range(epochs):
            for i in range(y.shape[0]):
                x_i, y_i = x[i], y[i]
                prediction = self.forward(x_i)
                self.backward(x_i, y_i, prediction)
    def evaluate(self, x, y):
        predictions = self.forward(x)
        accuracy = np.mean(predictions == y)
        return accuracy

So far, I've tried different learning rates and asking others in my class, which to be quite honest, hasn't really changed the outcome of my implementation. I'm expecting a ~90% accuracy rate, but I'm only getting a 50% accuracy rate.

Here is some sample data:

0.77    -1.14   0
-0.33   1.44    0
0.91    -3.07   0
-0.37   -1.91   0
-1.84   -1.13   0
-1.50   0.34    0
-0.63   -1.53   0
-1.08   -1.23   0
0.39    -1.99   0
-1.26   -2.90   0
-5.27   -0.78   0
-0.49   -2.74   0
1.48    -3.74   0
-1.64   -1.96   0
0.45    0.36    0
-1.48   -1.17   0
-2.94   -4.47   0
-2.19   -1.48   0
0.02    -0.02   0
-2.24   -2.12   0
-3.17   -3.69   0
-4.09   1.03    0
-2.41   -2.31   0
-3.45   -0.61   0
-3.96   -2.00   0
-2.95   -1.16   0
-2.42   -3.35   0
-1.74   -1.10   0
-1.61   -1.28   0
-2.59   -2.21   0
-2.64   -2.20   0
-2.84   -4.12   0
-1.45   -2.26   0
-3.98   -1.05   0
-2.97   -1.63   0
-0.68   -1.52   0
-0.10   -3.43   0
-1.14   -2.66   0
-2.92   -2.51   0
-2.14   -1.62   0
-3.33   -0.44   0
-1.05   -3.85   0
0.38    0.95    0
-0.05   -1.95   0
-3.20   -0.22   0
-2.26   0.01    0
-1.41   -0.33   0
-1.20   -0.71   0
-1.69   0.80    0
-1.52   -1.14   0
3.88    0.65    1
0.73    2.97    1
0.83    3.94    1
1.59    1.25    1
3.92    3.48    1
3.87    2.91    1
1.14    3.91    1
1.73    2.80    1
2.95    1.84    1
2.61    2.92    1
2.38    0.90    1
2.30    3.33    1
1.31    1.85    1
1.56    3.85    1
2.67    2.41    1
1.23    2.54    1
1.33    2.03    1
1.36    2.68    1
2.58    1.79    1
2.40    0.91    1
0.51    2.44    1
2.17    2.64    1
4.38    2.94    1
1.09    3.12    1
0.68    1.54    1
1.93    3.71    1
1.26    1.17    1
1.90    1.34    1
3.13    0.92    1
0.85    1.56    1
1.50    3.93    1
2.95    2.09    1
0.77    2.84    1
1.00    0.46    1
3.19    2.32    1
2.92    2.32    1
2.86    1.35    1
0.97    2.68    1
1.20    1.31    1
1.54    2.02    1
1.65    0.63    1
1.36    -0.22   1
2.63    0.40    1
0.90    2.05    1
1.26    3.54    1
0.71    2.27    1
1.96    0.83    1
2.52    1.83    1
2.77    2.82    1
4.16    3.34    1

Before using the perceptron model, this code is first randomized and then split into 2 parts: 2/3 of the original data into training and the other 1/3 into testing. After that, z-score standardization is performed the first 2 features of the training and testing datasets.

This is how I'm using the class:

perceptron = Perceptron(num_features = 2)
perceptron.train(combined_x_train[:, :2], combined_x_train[:, 2], epochs = 5, learning_rate=0.1)
accuracy = perceptron.evaluate(x_train, y_train)
print(f'Final Accuracy: {accuracy * 100:.2f}%')


  • Here is my implementation :

    import numpy as np
    from sklearn.model_selection import train_test_split
    # Sample data provided
    data = np.array([
        [0.77, -1.14, 0], [-0.33, 1.44, 0], [0.91, -3.07, 0], [-0.37, -1.91, 0],
        [-1.84, -1.13, 0], [-1.50, 0.34, 0], [-0.63, -1.53, 0], [-1.08, -1.23, 0],
        [0.39, -1.99, 0], [-1.26, -2.90, 0], [-5.27, -0.78, 0], [-0.49, -2.74, 0],
        [1.48, -3.74, 0], [-1.64, -1.96, 0], [0.45, 0.36, 0], [-1.48, -1.17, 0],
        [-2.94, -4.47, 0], [-2.19, -1.48, 0], [0.02, -0.02, 0], [-2.24, -2.12, 0],
        [-3.17, -3.69, 0], [-4.09, 1.03, 0], [-2.41, -2.31, 0], [-3.45, -0.61, 0],
        [-3.96, -2.00, 0], [-2.95, -1.16, 0], [-2.42, -3.35, 0], [-1.74, -1.10, 0],
        [-1.61, -1.28, 0], [-2.59, -2.21, 0], [-2.64, -2.20, 0], [-2.84, -4.12, 0],
        [-1.45, -2.26, 0], [-3.98, -1.05, 0], [-2.97, -1.63, 0], [-0.68, -1.52, 0],
        [-0.10, -3.43, 0], [-1.14, -2.66, 0], [-2.92, -2.51, 0], [-2.14, -1.62, 0],
        [-3.33, -0.44, 0], [-1.05, -3.85, 0], [0.38, 0.95, 0], [-0.05, -1.95, 0],
        [-3.20, -0.22, 0], [-2.26, 0.01, 0], [-1.41, -0.33, 0], [-1.20, -0.71, 0],
        [-1.69, 0.80, 0], [-1.52, -1.14, 0], [3.88, 0.65, 1], [0.73, 2.97, 1],
        [0.83, 3.94, 1], [1.59, 1.25, 1], [3.92, 3.48, 1], [3.87, 2.91, 1],
        [1.14, 3.91, 1], [1.73, 2.80, 1], [2.95, 1.84, 1], [2.61, 2.92, 1],
        [2.38, 0.90, 1], [2.30, 3.33, 1], [1.31, 1.85, 1], [1.56, 3.85, 1],
        [2.67, 2.41, 1], [1.23, 2.54, 1], [1.33, 2.03, 1], [1.36, 2.68, 1],
        [2.58, 1.79, 1], [2.40, 0.91, 1], [0.51, 2.44, 1], [2.17, 2.64, 1],
        [4.38, 2.94, 1], [1.09, 3.12, 1], [0.68, 1.54, 1], [1.93, 3.71, 1],
        [1.26, 1.17, 1], [1.90, 1.34, 1], [3.13, 0.92, 1], [0.85, 1.56, 1],
        [1.50, 3.93, 1], [2.95, 2.09, 1], [0.77, 2.84, 1], [1.00, 0.46, 1],
        [3.19, 2.32, 1], [2.92, 2.32, 1], [2.86, 1.35, 1], [0.97, 2.68, 1],
        [1.20, 1.31, 1], [1.54, 2.02, 1], [1.65, 0.63, 1], [1.36, -0.22, 1],
        [2.63, 0.40, 1], [0.90, 2.05, 1], [1.26, 3.54, 1], [0.71, 2.27, 1],
        [1.96, 0.83, 1], [2.52, 1.83, 1], [2.77, 2.82, 1], [4.16, 3.34, 1]
    # Split data into features (X) and labels (y)
    X = data[:, :2]
    y = data[:, 2]
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    # Perceptron class definition
    class Perceptron():
        def __init__(self, num_features):
            self.num_features = num_features
            self.weights = np.random.rand(num_features) * 0.1  # Small random weights initialization
            self.bias = 0.0
        def forward(self, x):
            # Compute the linear output and apply step function
            linear =, self.weights) + self.bias
            predictions = np.where(linear > 0, 1, 0)
            return predictions
        def backward(self, x, y, predictions):
            # Calculate the error
            errors = y - predictions
            # Update weights and bias using the perceptron rule
            self.weights += self.learning_rate *, errors)
            self.bias += self.learning_rate * np.sum(errors)
            return errors
        def train(self, x, y, epochs, learning_rate=0.01):
            self.learning_rate = learning_rate
            for e in range(epochs):
                # Calculate predictions for entire batch
                predictions = self.forward(x)
                # Update weights and bias after processing the entire batch
                self.backward(x, y, predictions)
        def evaluate(self, x, y):
            # Predict on the test data
            predictions = self.forward(x)
            # Calculate accuracy
            accuracy = np.mean(predictions == y)
            return accuracy
    # Using the Perceptron class
    perceptron = Perceptron(num_features=2)
    perceptron.train(X_train, y_train, epochs=100, learning_rate=0.01)
    accuracy = perceptron.evaluate(X_test, y_test)
    accuracy_train = perceptron.evaluate(X_train, y_train)
    # final result
    accuracy, accuracy_train

    I use, train_test_split before calling the function to test it quickly. Here are some errors I spot in your code :

    • Batch Training: The training is now using the entire batch for weight updates.
    • Predictions Shape: The forward function has been adapted for batch input.
    • Learning Rate and Epochs: Adjust the number of epochs and learning rate for convergence.