Search code examples
c#.netopencvmachine-learningonnxruntime

Optimization of conversion from opencv mat/Array to to OnnxRuntime Tensor?


I am using the ONNXRuntime to inference a UNet model and as a part of preprocessing I have to convert an EMGU OpenCV matrix to OnnxRuntime.Tensor.

I achieved it using two nested for loops which is unfortunately quite slow:

            var data = new DenseTensor<float>(new[] { 1, 3, WIDTH, HEIGHT});

            for (int y = 0; y < HEIGHT; y++)
            {
                for (int x = 0; x < WIDTH; x++)
                {
                    data[0, 0, x, y] = image.GetValue(2, y, x)/255.0;
                    data[0, 1, x, y] = image.GetValue(1, y, x)/255.0;
                    data[0, 2, x, y] = image.GetValue(0, y, x)/255.0;
                }
            } 

Then I found out that there exists a method which converts Array to DenseTensor. I wanted to use this method as follows:

        var imgToPredictFloat = new Mat(image.Height, image.Width, DepthType.Cv32F, 3);
        image.ConvertTo(imgToPredictFloat, DepthType.Cv32F, 1/255.0);
        CvInvoke.CvtColor(imgToPredictFloat, imgToPredictFloat, ColorConversion.Bgra2Rgb);

        var data = image.GetData().ToTensor<float>;
        var reshaped = data.Reshape(new int[] { 1, 3, WIDTH, HEIGHT});

This would greatly improve the performance however the layout of the output tensor is not correct (the same as from the for loop) and the model obviously won't work. Any suggestions how to reshape the array to the correct layout?

In the code is also performed converting int 0-255 to float 0-1 and BGR layout to RGB layout.


Solution

  • This is how I have used cv::Mat with ONNX Runtime ( C++ ) :

    const wchar_t* model_path = L"C:/data/DNN/ONNX/ResNet/resnet152v2/resnet152-v2-7.onnx";
    
    printf("Using Onnxruntime C++ API\n");
    Ort::Session session(env, model_path, session_options);
    
    
    //*************************************************************************
    // print model input layer (node names, types, shape etc.)
    Ort::AllocatorWithDefaultOptions allocator;
    
    size_t num_output_nodes = session.GetOutputCount();
    std::vector<char*> outputNames;
    for (size_t i = 0; i < num_output_nodes; ++i)
    {
        char* name = session.GetOutputName(i, allocator);
        std::cout << "output: " << name << std::endl;
        outputNames.push_back(name);
    }
    
    
    // print number of model input nodes
    size_t num_input_nodes = session.GetInputCount();
    std::vector<const char*> input_node_names(num_input_nodes);
    std::vector<int64_t> input_node_dims;  // simplify... this model has only 1 input node {1, 3, 224, 224}.
                                           // Otherwise need vector<vector<>>
    
    printf("Number of inputs = %zu\n", num_input_nodes);
    
    // iterate over all input nodes
    for (int i = 0; i < num_input_nodes; i++) {
        // print input node names
        char* input_name = session.GetInputName(i, allocator);
        printf("Input %d : name=%s\n", i, input_name);
        input_node_names[i] = input_name;
    
        // print input node types
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
    
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        printf("Input %d : type=%d\n", i, type);
    
        // print input shapes/dims
        input_node_dims = tensor_info.GetShape();
        printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
        for (int j = 0; j < input_node_dims.size(); j++)
            printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
    }
    
    
    cv::Size dnnInputSize;
    cv::Scalar mean;
    cv::Scalar std;
    bool rgb = true;
    
    //cv::Mat inputImage = cv::imread("C:/TestImages/kitten_01.jpg");
    cv::Mat inputImage = cv::imread("C:/TestImages/slug_01.jpg");
    
    rgb = true;
    dnnInputSize = cv::Size(224, 224);
    mean[0] = 0.485;
    mean[1] = 0.456;
    mean[2] = 0.406;
    std[0] = 0.229;
    std[1] = 0.224;
    std[2] = 0.225;
    
    cv::Mat blob;
    // ONNX: (N x 3 x H x W)
    cv::dnn::blobFromImage(inputImage, blob, 1.0 / 255.0, dnnInputSize, mean, rgb, false);
    
    size_t input_tensor_size = blob.total();
    
    std::vector<float> input_tensor_values(input_tensor_size);
    for (size_t i = 0; i < input_tensor_size; ++i)
    {
        input_tensor_values[i] = blob.at<float>(i);
    }
    std::vector<const char*> output_node_names = { outputNames.front() };
    
    // create input tensor object from data values
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
    assert(input_tensor.IsTensor());
    
    // score model & input tensor, get back output tensor
    auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
    assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
    
    // Get pointer to output tensor float values
    float* floatarr = output_tensors.front().GetTensorMutableData<float>();
    assert(abs(floatarr[0] - 0.000045) < 1e-6);
    
    cv::Mat1f result = cv::Mat1f(1000, 1, floatarr);
    
    cv::Point classIdPoint;
    double confidence = 0;
    minMaxLoc(result, 0, &confidence, 0, &classIdPoint);
    int classId = classIdPoint.y;
    std::cout << "confidence: " << confidence << std::endl;
    std::cout << "class: " << classId << std::endl;
    

    The actual conversion part that you need is imho (adjust size and mean/std according to your network):

    cv::Mat inputImage = cv::imread("C:/TestImages/slug_01.jpg");
    
    rgb = true;
    dnnInputSize = cv::Size(224, 224);
    mean[0] = 0.485;
    mean[1] = 0.456;
    mean[2] = 0.406;
    std[0] = 0.229;
    std[1] = 0.224;
    std[2] = 0.225;
    
    cv::Mat blob;
    // ONNX: (N x 3 x H x W)
    cv::dnn::blobFromImage(inputImage, blob, 1.0 / 255.0, dnnInputSize, mean, rgb, false);