I try to train my first simple neural network. I downloaded dataset from kaggle with already written on tensorflow model. I try to repeat this model on pytorch, but my network obviously not training and prediction does not change from example to example. Please help me. What wrong I do?
My model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.conv_2d_1 = Conv2d(3, out_channels=16, kernel_size=16, padding=0)
self.pooling1 = MaxPool2d((2, 2))
self.conv_2d_2 = Conv2d(16, out_channels=32, kernel_size=32, padding=0)
self.pooling2 = MaxPool2d((2, 2))
self.conv_2d_3 = Conv2d(32, out_channels=64, kernel_size=16, padding=0)
self.pooling3 = MaxPool2d((2, 2))
self.l1 = Linear(64*25, 128)
self.l2 = Linear(128, 2)
self.sigmoid = Sigmoid()
self.dropout = Dropout(0.2)
self.relu = ReLU()
self.softmax = softmax
self.flatten = tr.nn.Flatten()
def forward(self, x):
x.requires_grad = True
x = self.conv_2d_1(x)
x = self.relu(self.pooling1(x))
x = self.conv_2d_2(x)
x = self.relu(self.pooling2(x))
x = self.conv_2d_3(x)
x = self.relu(self.pooling3(x))
x = self.flatten(x)
x = x.reshape(64*25)
x = self.l1(x)
x = self.l2(x)
return x
Optimizer and loss function
loss_fn = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-3)
Training cycle
from matplotlib import pyplot as plt
test1 = tr.Tensor([0, 1]).to('cuda:1')
test2 = tr.Tensor([1, 0]).to('cuda:1')
epochs_number = 10
loss_data = np.zeros((train_clear.shape[0] + train_ill.shape[0]) * epochs_number)
loss_data_cursor = 0
model.train()
for k in range(epochs_number):
i = 0
while i < max(train_clear.shape[0], train_ill.shape[0]) + 1:
try:
optimizer.zero_grad()
prediction = model(tr.Tensor(train_clear[i]).to('cuda:1'))
loss = loss_fn(prediction, test1)
loss.backward()
optimizer.step()
# print(loss, prediction, test1)
loss_item = loss.item()
loss_data[loss_data_cursor] = loss_item
loss_data_cursor += 1
except:
pass
try:
prediction = model(tr.Tensor(train_ill[i]).to('cuda:1'))
loss = loss_fn(prediction, test2)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print(loss, prediction, test1)
loss_item = loss.item()
loss_data[loss_data_cursor] = loss_item
loss_data_cursor += 1
except:
pass
i += 1
plt.plot(loss_data)
plt.show()
Training loss
The test
model.train(mode=False)
prediction1 = model(tr.Tensor(train_clear[0]).to('cuda:1'))
prediction2 = model(tr.Tensor(train_ill[5]).to('cuda:1'))
prediction1, prediction2, model.parameters(), test_clear[0].shape
rand = tr.randn(test_clear[0].shape).to('cuda:1')
prediction3 = model(rand)
prediction1, prediction2, prediction3, model.parameters(), test_clear[0].shape
Test results
(tensor([ 0.2431, -0.3565], device='cuda:1', grad_fn=<AddBackward0>),
tensor([ 0.2431, -0.3565], device='cuda:1', grad_fn=<AddBackward0>),
tensor([ 58.9911, -75.1154], device='cuda:1', grad_fn=<AddBackward0>),
<generator object Module.parameters at 0x00000183A4845970>,
(3, 180, 180))
I finded solution. My problem was a using of torch.nn.ReLU instead of torch.nn.functional.relu. I finded that it isn`t equivalent methods and my problem was solved when I replace it.