Implementing a Custom Neural Network from Scratch in Python

I'm interested in delving deep into neural network architectures and want to challenge myself by implementing a custom neural network from scratch in Python, without relying on external libraries like TensorFlow or PyTorch. Can someone provide guidance on how to approach this task? I'm particularly interested in understanding the underlying mathematics and algorithms involved, as well as any optimizations for performance and efficiency. Additionally, are there any resources or tutorials available that can help me in this endeavor? Thank you for your assistance!

I am expecting to explain what necessary and basic foundation of how code is supposed to be look like

Solution

Here's a general approach you can follow:

Understand the Basics Design the Architecture Implement Forward Propagation Implement Backpropagation Choose Optimization Techniques Evaluate and Fine-Tune Optimizations for Efficiency Test with Benchmark Datasets

import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Initialize weights and biases
        self.weights_input_hidden = np.random.randn(input_size, hidden_size)
        self.biases_input_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.randn(hidden_size, output_size)
        self.biases_hidden_output = np.zeros((1, output_size))
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, inputs):
        # Forward pass
        self.hidden_input = np.dot(inputs, self.weights_input_hidden) + self.biases_input_hidden
        self.hidden_output = self.sigmoid(self.hidden_input)
        self.output = np.dot(self.hidden_output, self.weights_hidden_output) + self.biases_hidden_output
        return self.output
    
    def backward(self, inputs, targets, learning_rate):
        # Backward pass
        # Compute loss gradient
        output_error = targets - self.output
        output_delta = output_error * self.sigmoid_derivative(self.output)
        
        # Compute hidden layer gradient
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_output)
        
        # Update weights and biases
        self.weights_hidden_output += np.dot(self.hidden_output.T, output_delta) * learning_rate
        self.biases_hidden_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
        self.weights_input_hidden += np.dot(inputs.T, hidden_delta) * learning_rate
        self.biases_input_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate

# Example usage:
# Define input, hidden, and output layer sizes
input_size = 2
hidden_size = 3
output_size = 1

# Initialize neural network
nn = NeuralNetwork(input_size, hidden_size, output_size)

# Sample input data
inputs = np.array([[0, 1], [1, 0], [1, 1], [0, 0]])

# Sample target data
targets = np.array([[1], [1], [0], [0]])

# Train the neural network
epochs = 10000
learning_rate = 0.1
for epoch in range(epochs):
    output = nn.forward(inputs)
    nn.backward(inputs, targets, learning_rate)

# Test the trained neural network
test_inputs = np.array([[1, 1], [0, 0]])
predictions = nn.forward(test_inputs)
print("Predictions:", predictions)