i get NotImplementedError when try to use Self-Attention on YOLO.
class BertSelfAttention(nn.Module): def init(self, config): super().init() assert config["hidden_size"] % config["num_of_attention_heads"] == 0, "The hidden size is not a multiple of the number of attention heads"
self.num_attention_heads = config['num_of_attention_heads']
self.attention_head_size = int(config['hidden_size'] / config['num_of_attention_heads'])
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.query = nn.ModuleList([nn.Linear(config['hidden_size'], self.all_head_size)])
self.key = nn.ModuleList([nn.Linear(config['hidden_size'], self.all_head_size)])
self.value = nn.ModuleList([nn.Linear(config['hidden_size'], self.all_head_size)])
self.dense = nn.ModuleList([nn.Linear(config['hidden_size'], config['hidden_size'])])
def transpose_for_scores(self, x):
new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
x = x.view(*new_x_shape)
return x.permute(0, 2, 1, 3)
def forward(self, hidden_states):
# mixed_query_layer = self.query(hidden_states) # [Batch_size x Seq_length x Hidden_size]
# mixed_key_layer = self.key(hidden_states) # [Batch_size x Seq_length x Hidden_size]
# mixed_value_layer = self.value(hidden_states) # [Batch_size x Seq_length x Hidden_size]
size = hidden_states.size(dim=1)
query_layer = self.transpose_for_scores(self.query(torch.randn(16,size))) # [Batch_size x Num_of_heads x Seq_length x Head_size]
key_layer = self.transpose_for_scores(self.key(torch.randn(16,size))) # [Batch_size x Num_of_heads x Seq_length x Head_size]
value_layer = self.transpose_for_scores(self.value(torch.randn(16,size))) # [Batch_size x Num_of_heads x Seq_length x Head_size]
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1,-2)) # [Batch_size x Num_of_heads x Seq_length x Seq_length]
attention_scores = attention_scores / math.sqrt(self.attention_head_size) # [Batch_size x Num_of_heads x Seq_length x Seq_length]
attention_probs = nn.Softmax(dim=-1)(attention_scores) # [Batch_size x Num_of_heads x Seq_length x Seq_length]
context_layer = torch.matmul(attention_probs, value_layer) # [Batch_size x Num_of_heads x Seq_length x Head_size]
context_layer = context_layer.permute(0, 2, 1, 3).contiguous() # [Batch_size x Seq_length x Num_of_heads x Head_size]
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) # [Batch_size x Seq_length x Hidden_size]
context_layer = context_layer.view(*new_context_layer_shape) # [Batch_size x Seq_length x Hidden_size]
output = self.dense(context_layer)
return output
That's the result :(
Training start...
Epoch lr iou_loss dfl_loss cls_loss
0%| | 0/38 [00:11<?, ?it/s]
ERROR in training steps.
ERROR in training loop or eval/save model.
Traceback (most recent call last):
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/tools/train.py", line 143, in
main(args)
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/tools/train.py", line 133, in main
trainer.train()
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/core/engine.py", line 121, in train
self.train_one_epoch(self.epoch)
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/core/engine.py", line 135, in train_one_epoch
self.train_in_steps(epoch_num, self.step)
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/core/engine.py", line 152, in train_in_steps
preds, s_featmaps = self.model(images)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/models/yolo.py", line 36, in forward
x = self.neck(x)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/models/reppan.py", line 362, in forward
fpn_out0 = self.selfattention(x0)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/layers/common.py", line 50, in forward
mixed_query_layer = self.query(hidden_states) # [Batch_size x Seq_length x Hidden_size]
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 372, in _forward_unimplemented
raise NotImplementedError(f"Module [{type(self).name}] is missing the required "forward" function")
NotImplementedError: Module [ModuleList] is missing the required "forward" function
A ModuleList
is just a list that tracks pytorch objects/parameters. You can't call it because it has no forward method. I'm not sure why you are putting a single pytorch module inside a ModuleList
- you can just have the module on its own.
If you are looking for a pure pytorch implementation of multihead attention, you can check this implementation