python machine-learning pytorch neural-network

PyTorch Model always returns Zero Accuracy

I wanted to create a neural network to predict the hypotenuse of a triangle given the other two sides. For this, I use the pythagorean theorem to create 10,000 values which are used to train the model. The problem is that even though my average loss is 0.18, the accuracy is 0%. What am I doing wrong?

class SimpleMLP(nn.Module):
    def __init__(self, num_of_classes=10):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(2, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            # Output matches input and number of classes
            nn.Linear(64, num_of_classes),
        )

    def forward(self, x):
        return self.layers(x)


class PythagoreanDataset(Dataset):
    def __init__(self, transform=None):
        self.values = self._get_pythagorean_values()

    def __getitem__(self, index):
        a, b, c = self.values[index]
        label = torch.as_tensor([c], dtype=torch.float)
        data = torch.as_tensor([a, b], dtype=torch.float)
        return data, label

    def __len__(self):
        return len(self.values)

    def _get_pythagorean_values(self, array_size: int = 10000) -> list:
        values = []
        for i in range(array_size):
            a = float(randint(1, 500))
            b = float(randint(1, 500))
            c = math.sqrt(pow(a, 2) + pow(b, 2))
            values.append((a, b, c))
        return values


def _correct(output, target):
    predicted_digits = output.argmax(1)  # pick digit with largest network output
    correct_ones = (predicted_digits == target).type(
        torch.float
    )  # 1.0 for correct, 0.0 for incorrect
    return correct_ones.sum().item()


def train(
    data_loader: DataLoader,
    model: torch.nn.Module,
    criterion: torch.nn.Module,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
):
    model.train()

    num_batches = len(data_loader)
    num_items = len(data_loader.dataset)
    train_loss = 0
    total_loss = 0
    total_correct = 0
    for data, target in data_loader:
        # Copy data and targets to device
        data = data.to(device)
        target = target.to(device)

        # Do a forward pass
        output = model(data)

        # Calculate the loss
        loss = criterion(output, target)
        total_loss += loss

        # Count number of correct digits
        total_correct += _correct(output, target)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    train_loss = float(total_loss / num_batches)
    accuracy = total_correct / num_items
    print(f"Train accuracy: {accuracy:.2%}, Average loss: {train_loss:7f}")
    return train_loss


def test(
    test_loader: DataLoader,
    model: torch.nn.Module,
    criterion: torch.nn.Module,
    device: torch.device,
):
    model.eval()

    num_batches = len(test_loader)
    num_items = len(test_loader.dataset)

    test_loss = 0
    total_correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            # Copy data and targets to GPU
            data = data.to(device)
            target = target.to(device)

            # Do a forward pass
            output = model(data)

            # Calculate the loss
            loss = criterion(output, target)
            test_loss += loss.item()

            # Count number of correct digits
            total_correct += _correct(output, target)

    test_loss = test_loss / num_batches
    accuracy = total_correct / num_items
    print(f"Test accuracy: {100*accuracy:>0.1f}%, average loss: {test_loss:>7f}")
    return test_loss



def main():
    device = "cpu"

    dataset = PythagoreanDataset()

    # Creating data indices for training and validation splits:
    validation_split = 0.2
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    split = int(np.floor(validation_split * dataset_size))
    train_indices, val_indices = indices[split:], indices[:split]

    # Creating PT data samplers and loaders:

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=valid_sampler)

    model = SimpleMLP(num_of_classes=1).to(device)
    print(model)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters())

    epochs = 500

    losses = []
    for epoch in tqdm(range(epochs)):
        print(f"Training epoch: {epoch+1}")
        train_loss = train(train_loader, model, criterion, optimizer, device=device)
        test_loss = test(test_loader, model, criterion, device=device)
        losses.append((train_loss, test_loss))

    plot_loss_curves(losses=losses)

    # Example prediction
    test_input = torch.tensor([[3, 4]], dtype=torch.float32)
    predicted_output = model(test_input)
    print("Predicted hypotenuse:", predicted_output.item())
---

Solution

There's a lot of things about your code that don't make sense, but I think the _correct function is the cause the accuracy issue.

You create your model

model = SimpleMLP(num_of_classes=1)

Your model as designed takes in an input of size (bs, 2) and produces an output of size (bs, 1)

Now your _correct function:

def _correct(output, target):
    predicted_digits = output.argmax(1)  # pick digit with largest network output
    correct_ones = (predicted_digits == target).type(
        torch.float
    )  # 1.0 for correct, 0.0 for incorrect
    return correct_ones.sum().item()

The line predicted_digits = output.argmax(1) makes no sense. You are taking the argmax of a unit axis. This is returning 0 for every value

output = torch.randn(32, 1)
output.argmax(1)
>tensor([0, 0, 0, 0, 0, 0, 0, 0])

Taking the argmax of a vector is what you would do for a classification problem, but that isn't what you're doing here.

After that, your evaluation correct_ones = (predicted_digits == target) also doesn't make sense. Your model is a regression model producing a floating point output. It's highly unlikely your model will output a perfect integer value (ie 4.000000001 != 4).

Based on this, I would expect the _correct function to output 0.0 for all predictions.