Search code examples
pythonmachine-learningdeep-learningpytorchgenerative-adversarial-network

Simple GAN RuntimeError: Given groups=1, weight of size [512, 3000, 5], expected input[1, 60, 3000] to have 3000 channels, but got 60 channels instead


I'm new to GANs and I'm having a hard time matching the GAN architecture to my training data dimensions. The dimensions of my training data is (60x3000). My goal is to artificially generate a sample with size 1x3000. So I have 60 of training samples, since my training data is 60x3000. The architecture of my GAN is:

Generator(
  (map1): Conv1d(100, 512, kernel_size=(5,), stride=(1,))
  (map2): Conv1d(512, 256, kernel_size=(5,), stride=(1,))
  (map3): Conv1d(256, 3000, kernel_size=(5,), stride=(1,))
  (leakyRelu): LeakyReLU(negative_slope=0.1)
)
Discriminator(
  (map1): Conv1d(3000, 512, kernel_size=(5,), stride=(1,))
  (map2): Conv1d(512, 256, kernel_size=(5,), stride=(1,))
  (map3): Conv1d(256, 1, kernel_size=(5,), stride=(1,))
  (leakyRelu): LeakyReLU(negative_slope=0.1)
)

If I print my training data it looks like this:

array([[2.14236454, 2.10500993, 2.06635705, ..., 7.57922477, 7.56801547,
        7.55263677],
       ...,
       [1.07467659, 1.07582106, 1.07628207, ..., 1.49663065, 1.43491185,
        1.37456978]])

When I run my GAN code, I get this error, which is very confusing because my input data is [60x3000] not [1, 60, 3000]. Could you please guide me on how to resolve this error? I would love to get a deep theoretical understanding of why this error is arising and how to fix it. Thank you very much.

RuntimeError: Given groups=1, weight of size [512, 3000, 5], expected input[1, 60, 3000] to have 3000 channels, but got 60 channels instead

class Generator(nn.Module):
    def __init__(self, input_size, output_size):
        super(Generator, self).__init__()
        self.map1 = nn.Conv1d(input_size, 512, 5)
        self.map2 = nn.Conv1d(512, 256, 5)
        self.map3 = nn.Conv1d(256, output_size, 5)
        self.leakyRelu = nn.LeakyReLU(0.1)
        self.tanh = torch.tanh

    def forward(self, x):
        x = self.leakyRelu(self.map1(x))
        x = self.leakyRelu(self.map2(x))
        x = self.leakyRelu(self.map3(x))
        return self.tanh(x)

class Discriminator(nn.Module):
    def __init__(self, input_size, output_size):
        super(Discriminator, self).__init__()
        self.map1 = nn.Conv1d(input_size, 512, 5)
        self.map2 = nn.Conv1d(512, 256, 5)
        self.map3 = nn.Conv1d(256, output_size, 5)        
        self.leakyRelu = nn.LeakyReLU(0.1)
        self.sigmoid = torch.sigmoid

    def forward(self, x):
        x = x.float()
        x = self.leakyRelu(self.map1(x))
        x = self.leakyRelu(self.map2(x))
        x = self.leakyRelu(self.map3(x))
        return self.sigmoid(x)

def train():
    # Model parameters
    g_input_size = 100      # Design decision (i.e we can choose). Latent size (existing but not yet developed). Should match random noise dimension coming into generator
    g_output_size = 3000    # Size of generated output vector (should match input/desired data size)

    d_input_size = 3000    # Minibatch size - cardinality of distributions (should match size of input)
    d_output_size = 1     # Always 1. Single dimension for 'real' vs. 'fake' classification 

    d_sampler = get_distribution_sampler(0, 1) # real data placeholder 
    real_data = torch.tensor(interval_data)    # real data with dimensions (60 x 3000)
    gi_sampler = get_generator_input_sampler() # random noise with dimensions (g_input_size, g_output_size) => should match generator input size (latent size)

    G = Generator(input_size=g_input_size, output_size=g_output_size)
    D = Discriminator(input_size=d_input_size, output_size=d_output_size)

    d_learning_rate = 1e-3
    g_learning_rate = 1e-3
    sgd_momentum = 0.9
    num_epochs = 500
    print_interval = 100
    d_steps = 20
    g_steps = 20
    dfe, dre, ge = [], [], []
    d_real_data, d_fake_data, g_fake_data = None, None, None
    criterion = nn.BCELoss() 
    d_optimizer = optim.SGD(D.parameters(), lr=d_learning_rate, momentum=sgd_momentum)
    g_optimizer = optim.SGD(G.parameters(), lr=g_learning_rate, momentum=sgd_momentum)

    for epoch in range(num_epochs):
        ### train the Discriminator ###
        for d_index in range(d_steps): 
            D.zero_grad() 
            d_real_data = real_data # size (60x3000)
            d_real_data.requires_grad=True
            d_real_decision = D(d_real_data)
            d_real_error = criterion(d_real_decision, Variable(torch.ones([1])))
            d_real_error.backward()

            d_noise = Variable(gi_sampler(g_input_size, g_output_size))
            d_fake_data = G(d_noise).detach() 
            d_fake_decision = D(preprocess(d_fake_data.t()))
            d_fake_error = criterion(d_fake_decision, Variable(torch.zeros([1, 1]))) 
            d_optimizer.step()   

            dre.append(extract(d_real_error)[0])
            dfe.append(extract(d_fake_error)[0])

        ### train the Generator ###
        for g_index in range(g_steps):
            G.zero_grad() 
            noise = Variable(gi_sampler(g_input_size, g_output_size))
            g_fake_data = G(noise)
            dg_fake_decision = D(preprocess(g_fake_data.t())) 
            g_error = criterion(dg_fake_decision, Variable(torch.ones([1, 1])))  
            g_error.backward() 
            g_optimizer.step()  
            ge.append(extract(g_error)[0])

        if epoch % print_interval == 0:
            print("Epoch %s: D (%s real_err, %s fake_err) G (%s err); Real Dist (%s),  Fake Dist (%s) " %
                  (epoch, dre, dfe, ge, stats(extract(d_real_data)), stats(extract(d_fake_data))))
    
    return dfe, dre, ge, d_real_data, d_fake_data, g_fake_data

disc_fake_error, disc_real_error, gen_error, disc_real_data, disc_fake_data, gen_fake_data = train()

Solution

  • According to Pytorch's documentation for conv1d, the input has to be [batch_size, channels, sequence_length]. Also note that the data is EEG voltage values, the channel should be 1 (i.e RGB should be 3). So for 60 samples of 1x3000 EEG voltage values, the input should be reshaped into [60, 1, 3000]