I created code with NN that should work with california_housing dataset. But it doesn't work. I checked all in that code but I still got no solution. I'm stumped. Maybe someone could help me with it? Code:
import torch.nn.functional as F
import torch.nn as nn
import torch
import pandas as pd
import sklearn.datasets
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
data = sklearn.datasets.fetch_california_housing(as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=13)
class MyDataset(torch.utils.data.Dataset):
def __init__(self, x, y):
self.features = x
self.target = y
self.fv = self.features.values
self.tv = self.target.values
self.f_tensor = torch.tensor(self.fv, dtype=torch.float32)
self.t_tensor = torch.tensor(self.tv, dtype=torch.float32).reshape(-1, 1)
def __len__(self):
return len(self.features)
def __getitem__(self, idx):
return self.f_tensor[idx], self.t_tensor[idx]
train_dataset = MyDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=128,
shuffle=True, drop_last=True)
test_dataset = MyDataset(X_test, y_test)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=16,
shuffle=False, drop_last=True)
class FeedForward(nn.Module):
def __init__(self, input_dim, hidden_dims, output_dim=1):
super().__init__()
self.fc1 = nn.Linear(input_dim, hidden_dims[0])
self.bn1 = nn.BatchNorm1d(hidden_dims[0])
self.dropout1 = nn.Dropout(0.2)
self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
self.bn2 = nn.BatchNorm1d(hidden_dims[1])
self.dropout2 = nn.Dropout(0.2)
self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
self.bn3 = nn.BatchNorm1d(hidden_dims[2])
self.dropout3 = nn.Dropout(0.2)
self.fc4 = nn.Linear(hidden_dims[2], hidden_dims[3])
self.bn4 = nn.BatchNorm1d(hidden_dims[3])
self.dropout4 = nn.Dropout(0.2)
self.fc5 = nn.Linear(hidden_dims[3], output_dim)
def forward(self, x):
x = self.fc1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.dropout1(x)
x = self.fc2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc3(x)
x = self.bn3(x)
x = F.relu(x)
x = self.dropout3(x)
x = self.fc4(x)
x = self.bn4(x)
x = F.relu(x)
x = self.dropout4(x)
x = self.fc5(x)
return x
def train(model, optimizer, criterion, train_loader, device):
model.train()
running_loss = 0.0
correct = 0
total = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()
train_loss = running_loss / len(train_loader)
train_acc = correct / total
return train_loss, train_acc
def test(model, criterion, test_loader, device):
model.eval()
running_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item()
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()
test_loss = running_loss / len(test_loader)
test_acc = correct / total
return test_loss, test_acc
def train_model(model, optimizer, criterion, train_loader, test_loader, device, num_epochs=10):
train_losses = []
train_accs = []
test_losses = []
test_accs = []
for epoch in range(num_epochs):
train_loss, train_acc = train(model, optimizer, criterion, train_loader, device)
test_loss, test_acc = test(model, criterion, test_loader, device)
train_losses.append(train_loss)
train_accs.append(train_acc)
test_losses.append(test_loss)
test_accs.append(test_acc)
print(f"Epoch [{epoch+1}/{num_epochs}]: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
return train_losses, train_accs, test_losses, test_accs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hidden_dims = [100, 64, 32, 16]
model = FeedForward(8, hidden_dims)
model = model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_losses, train_accs, test_losses, test_accs = train_model(model, optimizer, criterion,
train_loader, test_loader, device)
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss")
plt.plot(test_losses, label="Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(train_accs, label="Train Accuracy")
plt.plot(test_accs, label="Test Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()
I was trying to change: numbers of layers, optimizers, optimizer params, number of epochs, learning rate and many more...
Your train loss was going down, so you're on the right tracks. The issue was mainly with the way validation metrics were computed.
The code was originally using predicted.eq(labels)
which measures whether the values are exactly equal - but since this is a regression problem the values will not be exactly equal (they'd have to match down to the last decimal place). In short, the code was using a classification accuracy metric for a regression problem.
Since this is a regression problem, the task is to make the predictions as close as possible to the target, and the error here can be measured using the mean absolute error (MAE) which means 'how far off' the predictions are on average.
I've modified the code and it runs as expected. Scaling the data is usually required for nets; it helps stabilise training and improves convergence. I've added feature scaling to the code.
import torch.nn.functional as F
import torch.nn as nn
import torch
import pandas as pd
import sklearn.datasets
import matplotlib.pyplot as plt
import math
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
data = sklearn.datasets.fetch_california_housing(as_frame=True)
X_train_orig, X_test_orig, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=13)
#Fit scaler on training data, and scale the data
scaler = StandardScaler().fit(X_train_orig).set_output(transform='pandas')
X_train = scaler.transform(X_train_orig)
X_test = scaler.transform(X_test_orig)
class MyDataset(torch.utils.data.Dataset):
def __init__(self, x, y):
self.features = x
self.target = y
self.fv = self.features.values
self.tv = self.target.values
self.f_tensor = torch.tensor(self.fv, dtype=torch.float32)
self.t_tensor = torch.tensor(self.tv, dtype=torch.float32).reshape(-1, 1)
def __len__(self):
return len(self.features)
def __getitem__(self, idx):
return self.f_tensor[idx], self.t_tensor[idx]
train_dataset = MyDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=128,
shuffle=True, drop_last=True)
test_dataset = MyDataset(X_test, y_test)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=16,
shuffle=False, drop_last=True)
class FeedForward(nn.Module):
def __init__(self, input_dim, hidden_dims, output_dim=1):
super().__init__()
self.fc1 = nn.Linear(input_dim, hidden_dims[0])
self.bn1 = nn.BatchNorm1d(hidden_dims[0])
self.dropout1 = nn.Dropout(0.2)
self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
self.bn2 = nn.BatchNorm1d(hidden_dims[1])
self.dropout2 = nn.Dropout(0.2)
self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
self.bn3 = nn.BatchNorm1d(hidden_dims[2])
self.dropout3 = nn.Dropout(0.2)
self.fc4 = nn.Linear(hidden_dims[2], hidden_dims[3])
self.bn4 = nn.BatchNorm1d(hidden_dims[3])
self.dropout4 = nn.Dropout(0.2)
self.fc5 = nn.Linear(hidden_dims[3], output_dim)
def forward(self, x):
x = self.fc1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.dropout1(x)
x = self.fc2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc3(x)
x = self.bn3(x)
x = F.relu(x)
x = self.dropout3(x)
x = self.fc4(x)
x = self.bn4(x)
x = F.relu(x)
x = self.dropout4(x)
x = self.fc5(x)
return x
def train(model, optimizer, criterion, train_loader, device):
model.train()
running_loss = 0.0
running_abs_error = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
predicted = model(inputs)
loss = criterion(predicted, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
running_abs_error += abs(predicted - labels).sum().item()
train_loss = running_loss / len(train_loader)
train_mae = running_abs_error / len(train_loader.dataset)
return train_loss, train_mae
def test(model, criterion, test_loader, device):
model.eval()
running_loss = 0.0
running_abs_error = 0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
predicted = model(inputs)
loss = criterion(predicted, labels)
running_loss += loss.item()
running_abs_error += abs(predicted - labels).sum().item()
test_loss = running_loss / len(test_loader)
test_mae = running_abs_error / len(test_loader.dataset)
return test_loss, test_mae
def train_model(model, optimizer, criterion, train_loader, test_loader, device, num_epochs=10):
train_losses = []
train_mae_scores = []
test_losses = []
test_mae_scores = []
for epoch in range(num_epochs):
train_loss, train_mae = train(model, optimizer, criterion, train_loader, device)
test_loss, test_mae = test(model, criterion, test_loader, device)
train_losses.append(train_loss)
train_mae_scores.append(train_mae)
test_losses.append(test_loss)
test_mae_scores.append(test_mae)
print(f"Epoch [{epoch+1}/{num_epochs}]: Train Loss: {train_loss:.4f}, Train MAE: {train_mae:.4f}, "
f"Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}")
return train_losses, train_mae_scores, test_losses, test_mae_scores
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hidden_dims = [100, 64, 32, 16]
model = FeedForward(8, hidden_dims)
model = model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_losses, train_mae_scores, test_losses, test_mae_scores = train_model(model, optimizer, criterion,
train_loader, test_loader, device)
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss")
plt.plot(test_losses, label="Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(train_mae_scores, label="Train MAE")
plt.plot(test_mae_scores, label="Test MAE")
plt.xlabel("Epochs")
plt.ylabel("mean absolute error")
plt.legend()
plt.gcf().set_size_inches(8, 2)
plt.show()