I am using pytorch to implement mnist handwritten digit recognition, but my loss is unchanged from the beginning, why? Here is my code:
import gzip
import pickle
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
f = gzip.open("./mnist.pkl.gz","rb")
train_data, val_data, test_data = pickle.load(f,encoding='latin1')
f.close()
train_data_img = torch.tensor(train_data[0].reshape(250,200,784))
train_data_ans = torch.tensor(train_data[1].reshape(250,200))
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.fc1 = nn.Linear(784, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 10)
def forward(self,x):
x = F.leaky_relu(self.fc1(x))
x = F.leaky_relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
optimizer = optim.SGD(net.parameters(),lr=0.105,momentum=0.98)
losses = []
loss = 1
for epoch in range(5):
for data,ans in zip(train_data_img,train_data_ans):
out = net(data)
ans = F.one_hot(ans)
loss = F.mse_loss(out, ans)
optimizer.zero_grad()
loss.backword()
optimizer.step()
losses.append(loss.item())
print(losses)
xlabel = np.linspace(0,len(losses),len(losses))
plt.plot(xlabel,losses)
plt.show()
I find that each input corresponds to the same output. What's the problem? I just learned this a little bit.
For classification you should use an activation on the last layer, like softmax:
def forward(self,x):
x = F.leaky_relu(self.fc1(x))
x = F.leaky_relu(self.fc2(x))
x = self.fc3(x)
return F.softmax(x, dim=1)
Also try more epochs, lower the learning rate to maybe 0.001
and fix the loss.backword()
typo.