I’m trying to implement the pre-trained VGG net to my script, in order to recognize faces from my dataset in RGB [256,256], but I’m getting a “size mismatch, m1: [1 x 2622], m2: [4096 x 2]” even if i'm resizing my images it doesn't work, as you can see my code work with resnet and alexnet.
I've tryed resizing the images with the function interpolate but the size mismatch persist.
def training(model_conv, learning_rate, wd, net):
criterion = nn.CrossEntropyLoss(weight= torch.FloatTensor([1,1]))
optimizer = torch.optim.Adam(model_conv.fc.parameters(), lr=learning_rate, weight_decay = wd)
total_step = len(train_loader)
loss_list = []
acc_list = []
print("Inizio il training")
for epoch in range(num_epochs):
for i, (im, labels) in enumerate(train_loader):
images = torch.nn.functional.interpolate(im, 224, mode = 'bilinear')
outputs = model_conv(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
torch.save(model_conv, 'TrainedModel.pt')
return images, labels
def main():
net = "vgg"
learning_rate = 10e-6
wd = 10e-4
if net == "vgg":
print("Hai selezionato VGG")
model_conv = VGG_FACE.vgg_face
data = torch.load("VGG_FACE.pth")
model_conv.load_state_dict(data)
model_conv.fc = nn.Linear(4096, 2)
model_conv[-1] = model_conv.fc
if __name__ == '__main__':
main()
For example this is another code where I used correctly my VGG with some random images
def test():
N=5
net = VGG_FACE.vgg_face
data = torch.load("VGG_FACE.pth")
net.load_state_dict(data)
net.eval()
names = open("names.txt").read().split()
with torch.no_grad():
mean = np.array([93.5940, 104.7624, 129.1863])
images = scipy.misc.imread("cooper2.jpg", mode="RGB")
images = scipy.misc.imresize(images, [224, 224])
images = images.astype(np.float32)
images -= mean[np.newaxis, np.newaxis, :]
images = np.transpose(images, (2, 0, 1))
images = images[np.newaxis, ...]
images = torch.tensor(images, dtype=torch.float32)
y = net(images)
y = torch.nn.functional.softmax(y, 1)
rank = torch.topk(y[0, :], N)
for i in range(N):
index = rank[1][i].item()
score = rank[0][i].item()
print("{}) {} ({:.2f})".format(i + 1, names[index], score))
print()
numero_classi = 2
net[-1] = torch.nn.Linear(4096, numero_classi)
if __name__ == "__main__":
test()
the error i'm gettin is
File "/Users/danieleligato/PycharmProjects/parametral/VGGTEST.py", line 53, in training
outputs = model_conv(images)
RuntimeError: size mismatch, m1: [4 x 2622], m2: [4096 x 2] at /Users/soumith/code/builder/wheel/pytorch-src/aten/src/TH/generic/THTensorMath.cpp:2070
THIS IS THE VGG NET THAT I'M USING
class LambdaBase(nn.Sequential):
def __init__(self, fn, *args):
super(LambdaBase, self).__init__(*args)
self.lambda_func = fn
def forward_prepare(self, input):
output = []
for module in self._modules.values():
output.append(module(input))
return output if output else input
class Lambda(LambdaBase):
def forward(self, input):
return self.lambda_func(self.forward_prepare(input))
class LambdaMap(LambdaBase):
def forward(self, input):
return map(self.lambda_func,self.forward_prepare(input))
class LambdaReduce(LambdaBase):
def forward(self, input):
return reduce(self.lambda_func,self.forward_prepare(input))
vgg_face = nn.Sequential( # Sequential,
nn.Conv2d(3,64,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(64,64,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
nn.Conv2d(64,128,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(128,128,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
nn.Conv2d(128,256,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
nn.Conv2d(256,512,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
nn.ReLU(),
nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
Lambda(lambda x: x.view(x.size(0),-1)), # View,
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(25088,4096)), # Linear,
nn.ReLU(),
nn.Dropout(0.5),
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,4096)), # Linear,
nn.ReLU(),
nn.Dropout(0.5),
nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,2622)), # Linear,
)
The error comes from this line:
model_conv.fc = nn.Linear(4096, 2)
Change to:
model_conv.fc = nn.Linear(2622, 2)