Search code examples
deep-learningneural-networkpytorch

Changing Learning Rate According to Layer Width in Pytroch


I am trying to train a network where the learning rate for each layer scales with 1/(layer width). Is there a way to do this in pytorch? I tried changing the learning rate in the optimizer and including it in my training loop but that didn't work. I've seen some people talk about this with Adam, but I am using SGD to train. Here are the chunks where I defined my model and training, if thats any help.

class ConvNet2(nn.Module):
    def __init__(self):
        super(ConvNet2, self).__init__()
        self.network = nn.Sequential(
        nn.Conv2d(3, 8, 3),
        nn.ReLU(),
        nn.Conv2d(8,32, 3),
        nn.ReLU(),
        nn.MaxPool2d(2, 2),

        nn.Conv2d(32, 32, 3),
        nn.ReLU(),
        nn.Conv2d(32,32, 3),
        nn.ReLU(),
        nn.MaxPool2d(2, 2),
        
        nn.Flatten(),

        nn.Linear(800, 10)
        )

    def forward(self, x):
        return self.network(x)

net2 = ConvNet2().to(device)


def train(network, number_of_epochs):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(network.parameters(), lr=learning_rate)
    for epoch in range(number_of_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            # get the inputs
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = network(inputs)
            loss.backward()
            optimizer.step()


Solution

  • In the documentation you can see that you can specify "per-parameter options". Assuming you only want to specify the learning rate for the Conv2d layers (this is easily customizable in the code below) you could do something like this:

    import torch
    from torch import nn
    from torch import optim
    from pprint import pprint
    
    class ConvNet2(nn.Module):
        def __init__(self):
            super(ConvNet2, self).__init__()
            self.network = nn.Sequential(
            nn.Conv2d(3, 8, 3),
            nn.ReLU(),
            nn.Conv2d(8,32, 3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
    
            nn.Conv2d(32, 32, 3),
            nn.ReLU(),
            nn.Conv2d(32,32, 3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Flatten(),
    
            nn.Linear(800, 10)
            )
    
        def forward(self, x):
            return self.network(x)
    
    net2 = ConvNet2()
    
    def getParameters(model):
        getWidthConv2D = lambda layer: layer.out_channels
        parameters = []
        for layer in model.children():
            paramdict = {'params': layer.parameters()}
            if (isinstance(layer, nn.Conv2d)):
                paramdict['lr'] = getWidthConv2D(layer) * 0.1 # Specify learning rate for Conv2D here
            parameters.append(paramdict)
        return parameters
    
    optimizer = optim.SGD(getParameters(net2.network), lr=0.05)
    print(optimizer)