I am trying to train a network where the learning rate for each layer scales with 1/(layer width). Is there a way to do this in pytorch? I tried changing the learning rate in the optimizer and including it in my training loop but that didn't work. I've seen some people talk about this with Adam, but I am using SGD to train. Here are the chunks where I defined my model and training, if thats any help.
class ConvNet2(nn.Module):
def __init__(self):
super(ConvNet2, self).__init__()
self.network = nn.Sequential(
nn.Conv2d(3, 8, 3),
nn.ReLU(),
nn.Conv2d(8,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 32, 3),
nn.ReLU(),
nn.Conv2d(32,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Flatten(),
nn.Linear(800, 10)
)
def forward(self, x):
return self.network(x)
net2 = ConvNet2().to(device)
def train(network, number_of_epochs):
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(network.parameters(), lr=learning_rate)
for epoch in range(number_of_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
# get the inputs
inputs = inputs.to(device)
labels = labels.to(device)
outputs = network(inputs)
loss = criterion(outputs, labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = network(inputs)
loss.backward()
optimizer.step()
In the documentation you can see that you can specify "per-parameter options". Assuming you only want to specify the learning rate for the Conv2d
layers (this is easily customizable in the code below) you could do something like this:
import torch
from torch import nn
from torch import optim
from pprint import pprint
class ConvNet2(nn.Module):
def __init__(self):
super(ConvNet2, self).__init__()
self.network = nn.Sequential(
nn.Conv2d(3, 8, 3),
nn.ReLU(),
nn.Conv2d(8,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 32, 3),
nn.ReLU(),
nn.Conv2d(32,32, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Flatten(),
nn.Linear(800, 10)
)
def forward(self, x):
return self.network(x)
net2 = ConvNet2()
def getParameters(model):
getWidthConv2D = lambda layer: layer.out_channels
parameters = []
for layer in model.children():
paramdict = {'params': layer.parameters()}
if (isinstance(layer, nn.Conv2d)):
paramdict['lr'] = getWidthConv2D(layer) * 0.1 # Specify learning rate for Conv2D here
parameters.append(paramdict)
return parameters
optimizer = optim.SGD(getParameters(net2.network), lr=0.05)
print(optimizer)