i am trying to concatenate bert model with Cnn 1d using pytorch . I used this code but I do not understand what is meaning of in_channels and out_channels in function conv1d if input shape into cnn model is torch(256,64,768)
class MixModel(nn.Module):
def __init__(self,pre_trained='distilbert-base-uncased'):
super().__init__()
self.bert = AutoModel.from_pretrained('distilbert-base-uncased')
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=1, out_channels=256, kernel_size=5, padding='valid', stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 256- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.clf = nn.Linear(self.hidden_size*2,6)
def forward(self,inputs, mask , labels):
cls_hs = self.bert(input_ids=inputs,attention_mask=mask, return_dict= False)
x=cls_hs
# x = torch.cat(cls_hs[0]) # x= [416, 64, 768]
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.clf(x)
return x
Edit I use recommended answer and change the parameters but i got error
class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
self.bert = AutoModel.from_pretrained('distilbert-base-uncased')
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=768, out_channels=256, kernel_size=5, padding='valid', stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
print(11)
self.dropout = nn.Dropout(0.3)
print(12)
self.clf = nn.Linear(self.hidden_size*2,6)
print(13)
def forward(self,inputs, mask , labels):
cls_hs = self.bert(input_ids=inputs,attention_mask=mask, return_dict= False)
x=cls_hs[0]
print(cls_hs[0])
print(len(cls_hs[0]))
print(cls_hs[0].size())
#x = torch.cat(cls_hs,0) # x= [416, 64, 768]
x = x.permute(0, 2, 1)
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.clf(x)
return x
the error is 5 frames /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias) 1846 if has_torch_function_variadic(input, weight, bias): 1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias) -> 1848 return torch._C._nn.linear(input, weight, bias) 1849 1850
RuntimeError: mat1 and mat2 shapes cannot be multiplied (65536x1 and 1536x6)
The dimension of the output prediction of BERT (and many other transformer-based models) is of shape batch
xseq-len
xfeature-dim
: That is, your input is a batch of 256 sequences of length (probably with padding) of 64 tokens, each token is represented by a feature vector of dimension 768.
In order to apply 1-d convolution along the sequence-len dimension, you will need first to permute
x
to be of shape batch
xdim
xlen
:
x = x.permute(0, 2, 1)
Now you can apply nn.Conv1d
, where the in_channels
is the dimension of x
= 768
. the out_channels
is up to you - what is going to be the hidden dimension of your model.