I'm trying to fit a small dataset(just 7x1 size) with a 3-layer perceptron model, but the loss can't converge. I'm fresh to machine learning area, can someone please give me a hint to adjust my code?
import torch
import torch.nn as nn
import torch.nn.functional as F
vec_shape = [7, 1]
x_0 = [500, 1000, 2000, 4000, 5000, 8000, 10000]
y_0 = [1.171467, 1.486507, 11.7738, 34.448421, 75.402871, 225.319848, 492.262426]
# x = torch.tensor(x_0).reshape(vec_shape).float()
x = torch.log(torch.tensor(x_0).reshape(vec_shape).float())
y = torch.tensor(y_0).reshape(vec_shape).float()
class Net(nn.Module):
def __init__(self,n_input,n_hidden,n_output):
super(Net,self).__init__()
self.hidden1 = nn.Linear(n_input,n_hidden)
self.hidden2 = nn.Linear(n_hidden,n_hidden)
self.predict = nn.Linear(n_hidden,n_output)
def forward(self,input):
out = self.hidden1(input)
out = F.relu(out)
out = self.hidden2(out)
out = torch.sigmoid(out)
out =self.predict(out)
return out
def weight_init(self):
for op in self.modules():
if isinstance(op, nn.Linear):
nn.init.normal_(op.weight.data)
nn.init.normal_(op.bias.data)
net = Net(1,10,1)
net.weight_init()
# print(net)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.1)
loss_func = torch.nn.MSELoss()
for t in range(500):
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(t%50 == 0):
print('Loss = %.4f' % loss.data)
I tried to expand the model or shrink it, but both changes don't work.
Rescaling and normalization is key in machine learning, your setup is pretty good and you apply some rescaling but simply not enough. With the limited amount of datapoints you have, the range is way too large. So just like you do with x_0
, apply torch.log
to y_0
. You can always scale back the predictions after training. Below you can find the adapted code, I changed two things:
torch.log
on y_0
0.01
import torch
import torch.nn as nn
import torch.nn.functional as F
vec_shape = [7, 1]
x_0 = [500, 1000, 2000, 4000, 5000, 8000, 10000]
y_0 = [1.171467, 1.486507, 11.7738, 34.448421, 75.402871, 225.319848, 492.262426]
# x = torch.tensor(x_0).reshape(vec_shape).float()
x = torch.log(torch.tensor(x_0).reshape(vec_shape).float())
y = torch.log(torch.tensor(y_0).reshape(vec_shape).float()) # modified
class Net(nn.Module):
def __init__(self,n_input,n_hidden,n_output):
super(Net,self).__init__()
self.hidden1 = nn.Linear(n_input,n_hidden)
self.hidden2 = nn.Linear(n_hidden,n_hidden)
self.predict = nn.Linear(n_hidden,n_output)
def forward(self,input):
out = self.hidden1(input)
out = F.relu(out)
out = self.hidden2(out)
out = torch.sigmoid(out)
out =self.predict(out)
return out
def weight_init(self):
for op in self.modules():
if isinstance(op, nn.Linear):
nn.init.normal_(op.weight.data)
nn.init.normal_(op.bias.data)
net = Net(1,10,1)
net.weight_init()
# print(net)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.01) # modified
loss_func = torch.nn.MSELoss()
for t in range(50000): # modified
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(t%50 == 0):
print('Loss = %.4f' % loss.data)
print(torch.exp(net(x))) # added
I also recommend normalizing your dataset after the logarithmic rescaling, for instance by dividing by the standard deviation and subtracting the mean.