Search code examples
deep-learningneural-network

Neural Network doesn't learn (Regression)


I created code with NN that should work with california_housing dataset. But it doesn't work. I checked all in that code but I still got no solution. I'm stumped. Maybe someone could help me with it? Code:

import torch.nn.functional as F
import torch.nn as nn
import torch
import pandas as pd
import sklearn.datasets
import matplotlib.pyplot as plt
import math

from sklearn.model_selection import train_test_split


data = sklearn.datasets.fetch_california_housing(as_frame=True)


X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=13)


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.features = x
        self.target = y
        self.fv = self.features.values
        self.tv = self.target.values

        self.f_tensor = torch.tensor(self.fv, dtype=torch.float32)
        self.t_tensor = torch.tensor(self.tv, dtype=torch.float32).reshape(-1, 1)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.f_tensor[idx], self.t_tensor[idx]


train_dataset = MyDataset(X_train, y_train)

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=128,
                                           shuffle=True, drop_last=True)

test_dataset = MyDataset(X_test, y_test)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=16,
                                          shuffle=False, drop_last=True)

class FeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim=1):
        super().__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.bn1 = nn.BatchNorm1d(hidden_dims[0])
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.bn2 = nn.BatchNorm1d(hidden_dims[1])
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        self.bn3 = nn.BatchNorm1d(hidden_dims[2])
        self.dropout3 = nn.Dropout(0.2)
        
        self.fc4 = nn.Linear(hidden_dims[2], hidden_dims[3])
        self.bn4 = nn.BatchNorm1d(hidden_dims[3])
        self.dropout4 = nn.Dropout(0.2)
        
        self.fc5 = nn.Linear(hidden_dims[3], output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.dropout3(x)

        x = self.fc4(x)
        x = self.bn4(x)
        x = F.relu(x)
        x = self.dropout4(x)

        x = self.fc5(x)

        return x

def train(model, optimizer, criterion, train_loader, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    return train_loss, train_acc


def test(model, criterion, test_loader, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    test_loss = running_loss / len(test_loader)
    test_acc = correct / total

    return test_loss, test_acc


def train_model(model, optimizer, criterion, train_loader, test_loader, device, num_epochs=10):
    train_losses = []
    train_accs = []
    test_losses = []
    test_accs = []

    for epoch in range(num_epochs):
        train_loss, train_acc = train(model, optimizer, criterion, train_loader, device)
        test_loss, test_acc = test(model, criterion, test_loader, device)

        train_losses.append(train_loss)
        train_accs.append(train_acc)
        test_losses.append(test_loss)
        test_accs.append(test_acc)

        print(f"Epoch [{epoch+1}/{num_epochs}]: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

    return train_losses, train_accs, test_losses, test_accs


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

hidden_dims = [100, 64, 32, 16]  

model = FeedForward(8, hidden_dims)
model = model.to(device)

criterion = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_losses, train_accs, test_losses, test_accs = train_model(model, optimizer, criterion,
                                                               train_loader, test_loader, device)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss")
plt.plot(test_losses, label="Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accs, label="Train Accuracy")
plt.plot(test_accs, label="Test Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

I was trying to change: numbers of layers, optimizers, optimizer params, number of epochs, learning rate and many more...


Solution

  • Your train loss was going down, so you're on the right tracks. The issue was mainly with the way validation metrics were computed.

    The code was originally using predicted.eq(labels) which measures whether the values are exactly equal - but since this is a regression problem the values will not be exactly equal (they'd have to match down to the last decimal place). In short, the code was using a classification accuracy metric for a regression problem.

    Since this is a regression problem, the task is to make the predictions as close as possible to the target, and the error here can be measured using the mean absolute error (MAE) which means 'how far off' the predictions are on average.

    I've modified the code and it runs as expected. Scaling the data is usually required for nets; it helps stabilise training and improves convergence. I've added feature scaling to the code.

    enter image description here

    import torch.nn.functional as F
    import torch.nn as nn
    import torch
    import pandas as pd
    import sklearn.datasets
    import matplotlib.pyplot as plt
    import math
    
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    
    data = sklearn.datasets.fetch_california_housing(as_frame=True)
    
    X_train_orig, X_test_orig, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=13)
    
    #Fit scaler on training data, and scale the data
    scaler = StandardScaler().fit(X_train_orig).set_output(transform='pandas')
    X_train = scaler.transform(X_train_orig)
    X_test = scaler.transform(X_test_orig)
    
    class MyDataset(torch.utils.data.Dataset):
        def __init__(self, x, y):
            self.features = x
            self.target = y
            self.fv = self.features.values
            self.tv = self.target.values
    
            self.f_tensor = torch.tensor(self.fv, dtype=torch.float32)
            self.t_tensor = torch.tensor(self.tv, dtype=torch.float32).reshape(-1, 1)
    
        def __len__(self):
            return len(self.features)
    
        def __getitem__(self, idx):
            return self.f_tensor[idx], self.t_tensor[idx]
    
    
    train_dataset = MyDataset(X_train, y_train)
    
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=128,
                                               shuffle=True, drop_last=True)
    
    test_dataset = MyDataset(X_test, y_test)
    
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=16,
                                              shuffle=False, drop_last=True)
    
    class FeedForward(nn.Module):
        def __init__(self, input_dim, hidden_dims, output_dim=1):
            super().__init__()
    
            self.fc1 = nn.Linear(input_dim, hidden_dims[0])
            self.bn1 = nn.BatchNorm1d(hidden_dims[0])
            self.dropout1 = nn.Dropout(0.2)
            
            self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
            self.bn2 = nn.BatchNorm1d(hidden_dims[1])
            self.dropout2 = nn.Dropout(0.2)
            
            self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
            self.bn3 = nn.BatchNorm1d(hidden_dims[2])
            self.dropout3 = nn.Dropout(0.2)
            
            self.fc4 = nn.Linear(hidden_dims[2], hidden_dims[3])
            self.bn4 = nn.BatchNorm1d(hidden_dims[3])
            self.dropout4 = nn.Dropout(0.2)
            
            self.fc5 = nn.Linear(hidden_dims[3], output_dim)
    
        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = F.relu(x)
            x = self.dropout1(x)
    
            x = self.fc2(x)
            x = self.bn2(x)
            x = F.relu(x)
            x = self.dropout2(x)
    
            x = self.fc3(x)
            x = self.bn3(x)
            x = F.relu(x)
            x = self.dropout3(x)
    
            x = self.fc4(x)
            x = self.bn4(x)
            x = F.relu(x)
            x = self.dropout4(x)
    
            x = self.fc5(x)
    
            return x
    
    def train(model, optimizer, criterion, train_loader, device):
        model.train()
        running_loss = 0.0
        running_abs_error = 0
    
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
    
            optimizer.zero_grad()
    
            predicted = model(inputs)
            loss = criterion(predicted, labels)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
            running_abs_error += abs(predicted - labels).sum().item()
    
        train_loss = running_loss / len(train_loader)
        train_mae = running_abs_error / len(train_loader.dataset)
    
        return train_loss, train_mae
    
    
    def test(model, criterion, test_loader, device):
        model.eval()
        running_loss = 0.0
        running_abs_error = 0
    
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
    
                predicted = model(inputs)
                loss = criterion(predicted, labels)
    
                running_loss += loss.item()
                running_abs_error += abs(predicted - labels).sum().item()
    
        test_loss = running_loss / len(test_loader)
        test_mae = running_abs_error / len(test_loader.dataset)
    
        return test_loss, test_mae
    
    
    def train_model(model, optimizer, criterion, train_loader, test_loader, device, num_epochs=10):
        train_losses = []
        train_mae_scores = []
        test_losses = []
        test_mae_scores = []
    
        for epoch in range(num_epochs):
            train_loss, train_mae = train(model, optimizer, criterion, train_loader, device)
            test_loss, test_mae = test(model, criterion, test_loader, device)
    
            train_losses.append(train_loss)
            train_mae_scores.append(train_mae)
            test_losses.append(test_loss)
            test_mae_scores.append(test_mae)
    
            print(f"Epoch [{epoch+1}/{num_epochs}]: Train Loss: {train_loss:.4f}, Train MAE: {train_mae:.4f}, "
                  f"Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}")
    
        return train_losses, train_mae_scores, test_losses, test_mae_scores
    
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    hidden_dims = [100, 64, 32, 16]  
    
    model = FeedForward(8, hidden_dims)
    model = model.to(device)
    
    criterion = nn.MSELoss()
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    train_losses, train_mae_scores, test_losses, test_mae_scores = train_model(model, optimizer, criterion,
                                                                               train_loader, test_loader, device)
    
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label="Train Loss")
    plt.plot(test_losses, label="Test Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(train_mae_scores, label="Train MAE")
    plt.plot(test_mae_scores, label="Test MAE")
    plt.xlabel("Epochs")
    plt.ylabel("mean absolute error")
    plt.legend()
    
    plt.gcf().set_size_inches(8, 2)
    plt.show()