Search code examples
matrixcudathrust

Unable to compile thrust code using reduce_by_key


I need to minimum values along columns of a matrix along with row indices using thrust. I use the following code (copied from orange owl solutions), However I get errors while compiling. I have posted it as an issue on the corresponding git page. The error message is huge and i dont know how to debug it. Can anyone help me with it? I am using cuda-8.0, thrust version 1.8.

The code:

#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<float> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 / Nrows),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 / Nrows) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows) * Ncols + _1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 % Nrows))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            thrust::minimum<thrust::tuple<float, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}

Error :

/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp(58): error: ambiguous "?" operation: second operand of type "const thrust::tuple<double, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>" can be converted to third operand type "thrust::tuple<float, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>", and vice versa
          detected during:
            instantiation of "thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::result_type thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::operator()(const thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::first_argument_type &, const thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::second_argument_type &) [with FlagType=int, ValueType=thrust::tuple<double, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=thrust::minimum<thrust::tuple<float, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>]" 

Solution

  • I guess you are using this code.

    A curious characteristic of that code is that the matrix is defined using double type, but the captured minima are stored in a float vector.

    If you want to use that code as-is, according to my testing, thrust (in CUDA 10, and apparently also CUDA 8) doesn't like this line:

            thrust::minimum<thrust::tuple<float, int> >()
    

    That operator is being used to compare two items to determine which is smaller, and it is templated to accept different kinds of items. However, it has decided that finding the minimum of two of those tuples is an "ambiguous" request. Part of the reason for this is that the operator returns a float, int tuple, but is being given variously a double,int tuple or a float,int tuple.

    We can fix/work around this by passing our own functor to do the job, that is explicit in terms of handling the tuples passed to it:

    $ cat t373.cu
    #include <iterator>
    #include <algorithm>
    
    #include <thrust/device_vector.h>
    #include <thrust/iterator/counting_iterator.h>
    #include <thrust/iterator/transform_iterator.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/iterator/discard_iterator.h>
    #include <thrust/reduce.h>
    #include <thrust/functional.h>
    #include <thrust/random.h>
    
    using namespace thrust::placeholders;
    struct my_min
    {
    template <typename T1, typename T2>
      __host__ __device__
      T2 operator()(T1 t1, T2 t2){
        if (thrust::get<0>(t1) < thrust::get<0>(t2)) return t1;
        return t2;
        }
    };
    
    int main()
    {
        const int Nrows = 6;
        const int Ncols = 8;
    
        /**************************/
        /* SETTING UP THE PROBLEM */
        /**************************/
    
        // --- Random uniform integer distribution between 0 and 100
        thrust::default_random_engine rng;
        thrust::uniform_int_distribution<int> dist(0, 20);
    
        // --- Matrix allocation and initialization
        thrust::device_vector<double> d_matrix(Nrows * Ncols);
        for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);
    
        printf("\n\nMatrix\n");
        for(int i = 0; i < Nrows; i++) {
            std::cout << " [ ";
            for(int j = 0; j < Ncols; j++)
                std::cout << d_matrix[i * Ncols + j] << " ";
            std::cout << "]\n";
        }
    
        /**********************************************/
        /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
        /**********************************************/
        thrust::device_vector<float> d_minima(Ncols);
        thrust::device_vector<int> d_indices(Ncols);
    
        thrust::reduce_by_key(
                thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)),
                thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)) + Nrows * Ncols,
                thrust::make_zip_iterator(
                        thrust::make_tuple(thrust::make_permutation_iterator(
                                        d_matrix.begin(),
                                        thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), ((_1 % Nrows) * Ncols + _1 / Nrows))),
                thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows)))),
                thrust::make_discard_iterator(),
                thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
                thrust::equal_to<int>(),
                my_min()
    //            thrust::minimum<thrust::tuple<float, int> >()
        );
    
        printf("\n\n");
        for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";
    
        return 0;
    }
    $ nvcc -o t373 t373.cu
    $ ./t373
    
    
    Matrix
     [ 0 1 12 18 20 3 10 8 ]
     [ 5 15 1 11 12 17 12 10 ]
     [ 18 20 15 20 6 8 18 13 ]
     [ 18 20 3 18 19 6 19 8 ]
     [ 6 10 8 16 14 11 12 1 ]
     [ 12 9 12 17 10 16 1 4 ]
    
    
    Min position = 0; Min value = 0
    Min position = 0; Min value = 1
    Min position = 1; Min value = 1
    Min position = 1; Min value = 11
    Min position = 2; Min value = 6
    Min position = 0; Min value = 3
    $
    

    I think a better fix is to just choose one or the other, either float or double. If we modify all float types to double, for example, then thrust is happy, without any other changes:

    $ cat t373a.cu
    #include <iterator>
    #include <algorithm>
    
    #include <thrust/device_vector.h>
    #include <thrust/iterator/counting_iterator.h>
    #include <thrust/iterator/transform_iterator.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/iterator/discard_iterator.h>
    #include <thrust/reduce.h>
    #include <thrust/functional.h>
    #include <thrust/random.h>
    
    using namespace thrust::placeholders;
    
    int main()
    {
        const int Nrows = 6;
        const int Ncols = 8;
    
        /**************************/
        /* SETTING UP THE PROBLEM */
        /**************************/
    
        // --- Random uniform integer distribution between 0 and 100
        thrust::default_random_engine rng;
        thrust::uniform_int_distribution<int> dist(0, 20);
    
        // --- Matrix allocation and initialization
        thrust::device_vector<double> d_matrix(Nrows * Ncols);
        for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);
    
        printf("\n\nMatrix\n");
        for(int i = 0; i < Nrows; i++) {
            std::cout << " [ ";
            for(int j = 0; j < Ncols; j++)
                std::cout << d_matrix[i * Ncols + j] << " ";
            std::cout << "]\n";
        }
    
        /**********************************************/
        /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
        /**********************************************/
        thrust::device_vector<double> d_minima(Ncols);
        thrust::device_vector<int> d_indices(Ncols);
    
        thrust::reduce_by_key(
                thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)),
                thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)) + Nrows * Ncols,
                thrust::make_zip_iterator(
                        thrust::make_tuple(thrust::make_permutation_iterator(
                                        d_matrix.begin(),
                                        thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), ((_1 % Nrows) * Ncols + _1 / Nrows))),
                thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows)))),
                thrust::make_discard_iterator(),
                thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
                thrust::equal_to<int>(),
                thrust::minimum<thrust::tuple<double, int> >()
        );
    
        printf("\n\n");
        for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";
    
        return 0;
    }
    $ nvcc -o t373a t373a.cu
    $ ./t373a
    
    
    Matrix
     [ 0 1 12 18 20 3 10 8 ]
     [ 5 15 1 11 12 17 12 10 ]
     [ 18 20 15 20 6 8 18 13 ]
     [ 18 20 3 18 19 6 19 8 ]
     [ 6 10 8 16 14 11 12 1 ]
     [ 12 9 12 17 10 16 1 4 ]
    
    
    Min position = 0; Min value = 0
    Min position = 0; Min value = 1
    Min position = 1; Min value = 1
    Min position = 1; Min value = 11
    Min position = 2; Min value = 6
    Min position = 0; Min value = 3
    $
    

    I think this latter solution of using consistent types is the more sensible solution.