I want my neural network to solve a polynomial regression problem like y=(x*x) + 2x -3.
So right now I created a network with 1 input node, 100 hidden nodes and 1 output node and gave it a lot of epochs to train with a high test data size. The problem is that the prediction after like 20000 epochs is okayish, but much worse then the linear regression predictions after training.
import torch
from torch import Tensor
from torch.nn import Linear, MSELoss, functional as F
from torch.optim import SGD, Adam, RMSprop
from torch.autograd import Variable
import numpy as np
# define our data generation function
def data_generator(data_size=1000):
# f(x) = y = x^2 + 4x - 3
inputs = []
labels = []
# loop data_size times to generate the data
for ix in range(data_size):
# generate a random number between 0 and 1000
x = np.random.randint(1000) / 1000
# calculate the y value using the function x^2 + 4x - 3
y = (x * x) + (4 * x) - 3
# append the values to our input and labels lists
inputs.append([x])
labels.append([y])
return inputs, labels
# define the model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = Linear(1, 100)
self.fc2 = Linear(100, 1)
def forward(self, x):
x = F.relu(self.fc1(x)
x = self.fc2(x)
return x
model = Net()
# define the loss function
critereon = MSELoss()
# define the optimizer
optimizer = SGD(model.parameters(), lr=0.01)
# define the number of epochs and the data set size
nb_epochs = 20000
data_size = 1000
# create our training loop
for epoch in range(nb_epochs):
X, y = data_generator(data_size)
X = Variable(Tensor(X))
y = Variable(Tensor(y))
epoch_loss = 0;
y_pred = model(X)
loss = critereon(y_pred, y)
epoch_loss = loss.data
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Epoch: {} Loss: {}".format(epoch, epoch_loss))
# test the model
model.eval()
test_data = data_generator(1)
prediction = model(Variable(Tensor(test_data[0][0])))
print("Prediction: {}".format(prediction.data[0]))
print("Expected: {}".format(test_data[1][0]))
Is their a way to get way better results? I wondered if I should try to get 3 outputs, call them a, b and c, such that y= a(x*x)+b(x)+c. But I have no idea how to implement that and train my neural network.
For this problem, it might be such easier if you consider the Net()
with 1 Linear
layer as Linear Regression
with inputs features including [x^2, x]
.
import torch
from torch import Tensor
from torch.nn import Linear, MSELoss, functional as F
from torch.optim import SGD, Adam, RMSprop
from torch.autograd import Variable
import numpy as np
# define our data generation function
def data_generator(data_size=1000):
# f(x) = y = x^2 + 4x - 3
inputs = []
labels = []
# loop data_size times to generate the data
for ix in range(data_size):
# generate a random number between 0 and 1000
x = np.random.randint(2000) / 1000 # I edited here for you
# calculate the y value using the function x^2 + 4x - 3
y = (x * x) + (4 * x) - 3
# append the values to our input and labels lists
inputs.append([x*x, x])
labels.append([y])
return inputs, labels
# define the model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = Linear(2, 1)
def forward(self, x):
return self.fc1(x)
model = Net()
Epoch: 0 Loss: 33.75775909423828
Epoch: 1000 Loss: 0.00046704441774636507
Epoch: 2000 Loss: 9.437128483114066e-07
Epoch: 3000 Loss: 2.0870876138445738e-09
Epoch: 4000 Loss: 1.126847400112485e-11
Prediction: 5.355223655700684
Expected: [5.355224999999999]
The coefficients a
, b
, c
you are looking for are actually the weight and bias of the self.fc1
:
print('a & b:', model.fc1.weight)
print('c:', model.fc1.bias)
# Output
a & b: Parameter containing:
tensor([[1.0000, 4.0000]], requires_grad=True)
c: Parameter containing:
tensor([-3.0000], requires_grad=True)
In only 5000 epochs, all converges: a
-> 1, b
-> 4, and c
-> -3.
The model is such a light-weight with only 3 parameters instead of:
(100 + 1) + (100 + 1) = 202 parameters in the old model
Hope this helps you!