I wanted to create a neural network to predict the hypotenuse of a triangle given the other two sides. For this, I use the pythagorean theorem to create 10,000 values which are used to train the model. The problem is that even though my average loss is 0.18, the accuracy is 0%. What am I doing wrong?
class SimpleMLP(nn.Module):
def __init__(self, num_of_classes=10):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(2, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU(),
# Output matches input and number of classes
nn.Linear(64, num_of_classes),
)
def forward(self, x):
return self.layers(x)
class PythagoreanDataset(Dataset):
def __init__(self, transform=None):
self.values = self._get_pythagorean_values()
def __getitem__(self, index):
a, b, c = self.values[index]
label = torch.as_tensor([c], dtype=torch.float)
data = torch.as_tensor([a, b], dtype=torch.float)
return data, label
def __len__(self):
return len(self.values)
def _get_pythagorean_values(self, array_size: int = 10000) -> list:
values = []
for i in range(array_size):
a = float(randint(1, 500))
b = float(randint(1, 500))
c = math.sqrt(pow(a, 2) + pow(b, 2))
values.append((a, b, c))
return values
def _correct(output, target):
predicted_digits = output.argmax(1) # pick digit with largest network output
correct_ones = (predicted_digits == target).type(
torch.float
) # 1.0 for correct, 0.0 for incorrect
return correct_ones.sum().item()
def train(
data_loader: DataLoader,
model: torch.nn.Module,
criterion: torch.nn.Module,
optimizer: torch.optim.Optimizer,
device: torch.device,
):
model.train()
num_batches = len(data_loader)
num_items = len(data_loader.dataset)
train_loss = 0
total_loss = 0
total_correct = 0
for data, target in data_loader:
# Copy data and targets to device
data = data.to(device)
target = target.to(device)
# Do a forward pass
output = model(data)
# Calculate the loss
loss = criterion(output, target)
total_loss += loss
# Count number of correct digits
total_correct += _correct(output, target)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss = float(total_loss / num_batches)
accuracy = total_correct / num_items
print(f"Train accuracy: {accuracy:.2%}, Average loss: {train_loss:7f}")
return train_loss
def test(
test_loader: DataLoader,
model: torch.nn.Module,
criterion: torch.nn.Module,
device: torch.device,
):
model.eval()
num_batches = len(test_loader)
num_items = len(test_loader.dataset)
test_loss = 0
total_correct = 0
with torch.no_grad():
for data, target in test_loader:
# Copy data and targets to GPU
data = data.to(device)
target = target.to(device)
# Do a forward pass
output = model(data)
# Calculate the loss
loss = criterion(output, target)
test_loss += loss.item()
# Count number of correct digits
total_correct += _correct(output, target)
test_loss = test_loss / num_batches
accuracy = total_correct / num_items
print(f"Test accuracy: {100*accuracy:>0.1f}%, average loss: {test_loss:>7f}")
return test_loss
def main():
device = "cpu"
dataset = PythagoreanDataset()
# Creating data indices for training and validation splits:
validation_split = 0.2
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
train_indices, val_indices = indices[split:], indices[:split]
# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=valid_sampler)
model = SimpleMLP(num_of_classes=1).to(device)
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
epochs = 500
losses = []
for epoch in tqdm(range(epochs)):
print(f"Training epoch: {epoch+1}")
train_loss = train(train_loader, model, criterion, optimizer, device=device)
test_loss = test(test_loader, model, criterion, device=device)
losses.append((train_loss, test_loss))
plot_loss_curves(losses=losses)
# Example prediction
test_input = torch.tensor([[3, 4]], dtype=torch.float32)
predicted_output = model(test_input)
print("Predicted hypotenuse:", predicted_output.item())
---
There's a lot of things about your code that don't make sense, but I think the _correct
function is the cause the accuracy issue.
You create your model
model = SimpleMLP(num_of_classes=1)
Your model as designed takes in an input of size (bs, 2)
and produces an output of size (bs, 1)
Now your _correct
function:
def _correct(output, target):
predicted_digits = output.argmax(1) # pick digit with largest network output
correct_ones = (predicted_digits == target).type(
torch.float
) # 1.0 for correct, 0.0 for incorrect
return correct_ones.sum().item()
The line predicted_digits = output.argmax(1)
makes no sense. You are taking the argmax of a unit axis. This is returning 0
for every value
output = torch.randn(32, 1)
output.argmax(1)
>tensor([0, 0, 0, 0, 0, 0, 0, 0])
Taking the argmax of a vector is what you would do for a classification problem, but that isn't what you're doing here.
After that, your evaluation correct_ones = (predicted_digits == target)
also doesn't make sense. Your model is a regression model producing a floating point output. It's highly unlikely your model will output a perfect integer value (ie 4.000000001 != 4).
Based on this, I would expect the _correct
function to output 0.0
for all predictions.