I am trying to apply L1 regularization on a logistic model
class LogisticRegression(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, x):
x = x.reshape(-1, 784)
output = self.linear(x)
return output
def training_step(self, batch):
images, labels = batch
output = self(images)
loss = F.cross_entropy(output, labels)
acc = accuracy(output, labels)
return {'Training_loss': loss, 'Training_acc': acc}
def training_epoch_end(self, outputs):
batch_losses = [x['Training_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['Training_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean()
return {'Training_loss': epoch_loss.item(), 'Training_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], Training_loss: {:.4f}, Training_acc: {:.4f}".format(epoch, result['Training_loss'], result['Training_acc']))
model = LogisticRegression()
But I think I am doing it wrong the accuracy did not change.
L1=0.2
def evaluate(model_b, trainloader):
outputs = [model_b.training_step(batch) for batch in trainloader]
return model_b.training_epoch_end(outputs)
def fit(epochs, lr, model_b, trainloader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model_b.parameters(), lr)
for epoch in range(epochs):
##### Training Phase
for batch in trainloader:
loss = model_b.training_step(batch)['Training_loss']
loss_Lasso = loss + 0.5 * L1 # L1 reg
loss_Lasso.backward()
optimizer.step()
optimizer.zero_grad()
result = evaluate_b(model_b, trainloader)
model_b.epoch_end(epoch, result)
history.append(result)
return history
Can anyone help me with what I am missing and how I can really apply L1 regularization? Also, is L1 regularization called lasso?
I believe the l1-norm is a type of Lasso regularization, yes, but there are others.
In your snippet L1
is set as a constant, instead you should measure the l1-norm of your model's parameters. Then sum it with your network's loss, as you did. In your example there is a single layer, so you will only need self.linear
's parameters. First gather all parameters then measure the total norm with torch.norm
. You could also use nn.L1Loss
.
params = torch.cat([x.view(-1) for x in model.linear.parameters()])
L1 = lamb*torch.norm(params, p=1)
Where lamb
is your lambda regularization parameter and model
is initialized from the LogisticRegression
class.