I am trying to interface libtorch and OpenCV to predict the classes using the Yolov5 model. The weight I am using is yolov5s.pt
. The source code is
cv::Mat image = file->input_image(); // read image and resize into 640x640
auto tensor = torch::from_blob(image.data, {image.rows,image.cols,3}, torch::kFloat);
tensor = tensor.view({1,640,640,3});
std::cout << tensor.sizes() << std::endl;
try {
auto output = model.forward({tensor}).toTensor();
std::cout << output.sizes() << std::endl;
} catch (std::runtime_error & e) {
std::cerr << "[X] Error: " << e.what() << std::endl;
return;
}
Error message
RuntimeError: Given groups=1, weight of size [32, 3, 6, 6], expected input[1, 640, 640, 3] to have 3
channels, but got 640 channels instead
Traceback
Traceback of TorchScript, serialized code (most recent call last):
File "code/__torch__/models/yolo.py", line 59, in forward
model23 = self.model
_0 = getattr(model23, "0")
_25 = (_2).forward((_1).forward((_0).forward(x, ), ), )
~~~~~~~~~~~ <--- HERE
_26 = (_4).forward((_3).forward(_25, ), )
_27 = (_6).forward((_5).forward(_26, ), )
File "code/__torch__/models/common.py", line 12, in forward
act = self.act
conv = self.conv
_0 = (act).forward((conv).forward(x, ), )
~~~~~~~~~~~~~ <--- HERE
return _0
class C3(Module):
File "code/__torch__/torch/nn/modules/conv.py", line 12, in forward
bias = self.bias
weight = self.weight
x0 = torch._convolution(x, weight, bias, [2, 2], [2, 2], [1, 1], False, [0, 0], 1, False, False, True, True)
~~~~~~~~~~~~~~~~~~ <--- HERE
return x0
The solution was pretty simple. I am feeling too embarrassing, but you shouldn't.
Here is the solution
// forgot to add these both lines
// the yolov5 is expects [BATCH, CHANNEL, WIDTH, HEIGHT]
tensor = tensor.permute({2,0,1});
tensor = tensor = tensor.unsqueeze(0);
std::cout << tensor.sizes() << std::endl;