I am writing the code to train a bert
model on my dataset. By when I run the code it throws an error in the average pool layer. I am unable to understand what causes this error.
class BERTBaseUncased(nn.Module):
def __init__(self, bert_path):
super(BERTBaseUncased, self).__init__()
self.bert_path = bert_path
self.bert = transformers.BertModel.from_pretrained(self.bert_path)
self.bert_drop = nn.Dropout(0.3)
self.out = nn.Linear(768 * 2, 1)
def forward(
self,
ids,
mask,
token_type_ids
):
o1, _ = self.bert(
ids,
attention_mask=mask,
token_type_ids=token_type_ids)
apool = torch.mean(o1, 1)
mpool, _ = torch.max(o1, 1)
cat = torch.cat((apool, mpool), 1)
bo = self.bert_drop(cat)
p2 = self.out(bo)
return p2
Exception in device=TPU:0: mean() received an invalid combination of arguments - got (str, int), but expected one of:
* (Tensor input, *, torch.dtype dtype)
* (Tensor input, tuple of names dim, bool keepdim, *, torch.dtype dtype, Tensor out)
* (Tensor input, tuple of ints dim, bool keepdim, *, torch.dtype dtype, Tensor out)
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 228, in _start_fn
fn(gindex, *args)
File "<ipython-input-12-94e926c1f4df>", line 4, in _mp_fn
a = _run()
File "<ipython-input-5-ef9fa564682f>", line 146, in _run
train_loop_fn(para_loader.per_device_loader(device), model, optimizer, device, scheduler=scheduler)
File "<ipython-input-5-ef9fa564682f>", line 22, in train_loop_fn
token_type_ids=token_type_ids
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 577, in __call__
result = self.forward(*input, **kwargs)
File "<ipython-input-11-9196e0d23668>", line 73, in forward
apool = torch.mean(o1, 1)
TypeError: mean() received an invalid combination of arguments - got (str, int), but expected one of:
* (Tensor input, *, torch.dtype dtype)
* (Tensor input, tuple of names dim, bool keepdim, *, torch.dtype dtype, Tensor out)
* (Tensor input, tuple of ints dim, bool keepdim, *, torch.dtype dtype, Tensor out)
I am trying to run this on a Kaggle TPU. How to fix this?
Since one of the 3.X updates, the models return now task-specific output objects (which are dictionaries) instead of plain tuples. You can either force the model to return a tuple by specifying return_dict=False
:
o1, _ = self.bert(
ids,
attention_mask=mask,
token_type_ids=token_type_ids,
return_dict=False)
or by utilizing the basemodeloutputwithpoolingandcrossattentions object:
o = self.bert(
ids,
attention_mask=mask,
token_type_ids=token_type_ids)
#you can view the other attributes with o.keys()
o1 = o.last_hidden_state