conv2d() received an invalid combination of arguments

After resnet convolution, I want to further compress the 256 dimensions to 20 dimensions. I directly wrote a layer in the back, but after forward propagation, there is an error in this layer, I don't know why？

def forward(self, x):
    x = self.conv1(x)
    dif_residual1 = self.downsample1(x)
    x = self.layer1_1(x)
    x =x + dif_residual1
    residual = x
    x = self.layer1_2(x)
    x = x + residual
    residual = x
    x = self.layer1_3(x)
    x = x + residual

    if self.out_channel != 256:
        x = self.layer2

    filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
    x = F.conv2d(x, weight=filters, padding=0)

The dimension of x before I do if is：

x = {Tensor:(1,256,117,240)}

But after the if statement is executed, it becomes what the picture shows。

The error I get is this：

x = F.conv2d(feature, weight=filters, padding=0)
TypeError: conv2d() received an invalid combination of arguments - got (Sequential, weight=Tensor, padding=int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)

Encounter a new problem：

File "D:\software\Anaconda\envs\torch1.10\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 117, 240]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

My code：

class VGG(nn.Module):

def __init__(self, in_channel, out_channel=None, init_weights=True, device='gpu',batch_size=1):
    super(VGG, self).__init__()
    
    self.batch_size = batch_size
    self.out_channel = out_channel
    if device == 'gpu':
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    else:
        self.device = torch.device("cpu")

    modes = 'reflect'
    out_channel1 = 64
    self.conv1_1 = nn.Sequential(
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
        nn.BatchNorm2d(out_channel1),
        nn.LeakyReLU()
    )
    self.conv1_2 = nn.Sequential(
        nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
        nn.BatchNorm2d(out_channel1),
        nn.LeakyReLU()
    )
    out_channel2 = 128
    self.conv2_1 = nn.Sequential(
        nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
        nn.BatchNorm2d(out_channel2),
        nn.LeakyReLU()
    )
    self.conv2_2 = nn.Sequential(
        nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
        nn.BatchNorm2d(out_channel2),
        nn.LeakyReLU()
    )
    out_channel3 = 256
    self.conv3_1 = nn.Sequential(
        nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
        nn.BatchNorm2d(out_channel3),
        nn.LeakyReLU()
    )
    self.conv3_2 = nn.Sequential(
        nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
        nn.BatchNorm2d(out_channel3),
        nn.LeakyReLU()
    )

    if out_channel == None:
        self.out_channel = 256
        self.conv3_3 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
                      padding_mode=modes, bias=False),
            nn.BatchNorm2d(out_channel3),
            nn.LeakyReLU()
        )
    else:
        self.conv3_3 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
            nn.BatchNorm2d(out_channel3),
            nn.LeakyReLU(),
            nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.LeakyReLU()
        )


    if init_weights:
        self._init_weight()


def forward(self, x):

    x = self.conv1_1(x)
    x = self.conv1_2(x)

    x = self.conv2_1(x)
    x = self.conv2_2(x)

    x = self.conv3_1(x)
    x = self.conv3_2(x)

    x = self.conv3_3(x)

    feature = x

    filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
    x = F.conv2d(x, weight = filters, padding = 0)

    return x,feature

out_channel = 20
model = VGG(in_channel=12, out_channel=out_channel, init_weights=True, batch_size=batch_size)

 for epoch in range(start_epoch+1,epochs):
    # train
    model.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, file=sys.stdout)
    for step, data in enumerate(train_bar):
        images, labels = data
        optimizer.zero_grad()
        outputs,feature = model(images.to(device))
        outputs = tonser_nolmal(outputs)
        loss = loss_function(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
                                                                 epochs,
                                                                 loss)
    checkpoint = {
        "net": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "epoch": epoch
    }
    torch.save(checkpoint, save_path + "/model-{}.pth".format(epoch))

    # validate
    model.eval()
    count_acc = 0.0
    count_mae = 0.0
    with torch.no_grad():
        val_bar = tqdm(validate_loader, file=sys.stdout)
        for val_data in val_bar:
            val_images, val_labels = val_data
            outputs,_ = model(val_images.to(device))
            # outputs = F.normalize(outputs,dim=3)
            outputs = tonser_nolmal(outputs)
            loss = loss_function(outputs, val_labels.to(device))
            count_acc = count_acc + loss.item()
            mae = Evaluation().MAE(outputs, val_labels.to(device))
            count_mae = count_mae + mae.item()

Solution

The error is likely to be caused by the following variable assignment:

    if self.out_channel != 256:
        x = self.layer2

which can be easily fixed by changing it to

        x = self.layer2(x)

Update:

As OP updated his code, I did some test. There were several things which I found problematic:

self._init_weight was not provided, so I commented it out;
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device). The filter weight should have a shape of (c_out, c_in, kernel_size, kernel_size). However, batch_size appeared in the position of out_channels.
The role of filter in the forward was not clear to me. If you wanted to reduce the out_channels further from 256 to 20, then initializing your model with VGG(..., out_channel=20) is sufficient. Basically, self.conv3_3 would do the job.

On my end, I modified the code a little bit and it ran successfully:

import sys
import torch
import torch.nn as nn
from tqdm import tqdm
from torchvision.datasets import FakeData
from torch.utils.data import DataLoader
import torch.nn.functional as F

dataset = [torch.randn(12, 64, 64) for _ in range(1000)]
train_loader = DataLoader(dataset, batch_size=1, shuffle=True)


class VGG(nn.Module):

    def __init__(self, in_channel, out_channel=None, init_weights=True, device='cpu', batch_size=1):
        super(VGG, self).__init__()

        self.batch_size = batch_size
        self.out_channel = out_channel
        self.device = device

        modes = 'reflect'
        out_channel1 = 64
        self.conv1_1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
            nn.BatchNorm2d(out_channel1),
            nn.LeakyReLU()
        )
        self.conv1_2 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
            nn.BatchNorm2d(out_channel1),
            nn.LeakyReLU()
        )
        out_channel2 = 128
        self.conv2_1 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
            nn.BatchNorm2d(out_channel2),
            nn.LeakyReLU()
        )
        self.conv2_2 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
            nn.BatchNorm2d(out_channel2),
            nn.LeakyReLU()
        )
        self.out_channel3 = out_channel3 = 256
        self.conv3_1 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
            nn.BatchNorm2d(out_channel3),
            nn.LeakyReLU()
        )
        self.conv3_2 = nn.Sequential(
            nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
            nn.BatchNorm2d(out_channel3),
            nn.LeakyReLU()
        )
        
        self.out_channel = out_channel
        if out_channel == None:
            self.conv3_3 = nn.Sequential(
                nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
                          padding_mode=modes, bias=False),
                nn.BatchNorm2d(out_channel3),
                nn.LeakyReLU()
            )
        else:
            self.conv3_3 = nn.Sequential(
                nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
                nn.BatchNorm2d(out_channel3),
                nn.LeakyReLU(),
                nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
                nn.BatchNorm2d(out_channel),
                nn.LeakyReLU()
            )
        # The implementation of _init_weight is not found
#         if init_weights:
#             self._init_weight()


    def forward(self, x):

        x = self.conv1_1(x)
        x = self.conv1_2(x)

        x = self.conv2_1(x)
        x = self.conv2_2(x)

        x = self.conv3_1(x)
        x = self.conv3_2(x)

        x = self.conv3_3(x)

        feature = x
        
        if x.shape[1] == 256:  # self.out_channel is None
            filters = torch.ones(20, self.out_channel3, 1, 1).to(self.device)
            x = F.conv2d(x, weight = filters, padding = 0)

        return x, feature


out_channel = 20
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = VGG(in_channel=12, out_channel=None, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
model = VGG(in_channel=12, out_channel=20, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)

Outputs:

torch.Size([1, 20, 64, 64])
torch.Size([1, 20, 64, 64])