Search code examples
c++cudathrustgpureduction

N largest elements of a vector along with their indices


I have a thrust::device_vector <float> vec. Assume that vec.size() = L and that N < L. I want to find the largest N elements in vec along with their indices. How can we do this efficiently using raw CUDA or thrust?


Solution

  • A simple solution is to first sort the values and then select the last N elements.

    The following example selects the N=5 largest elements and their original indices from L=18 values.

    compile using

    nvcc -std=c++11 nlargest.cu -o nlargest


    output when running ./nlargest

    d_values:   1   2   3   4   5   6   7   8   9   4   5   6   7   8   9   0   1   2   
    d_indices:  0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  
    d_values:   0   1   1   2   2   3   4   4   5   5   6   6   7   7   8   8   9   9   
    d_indices:  15  0   16  1   17  2   3   9   4   10  5   11  6   12  7   13  8   14  
    d_values_s: 7   8   8   9   9   
    d_indices_s:12  7   13  8   14  
    

    nlargest.cu

    #include <thrust/device_vector.h>
    #include <thrust/sort.h>
    #include <thrust/copy.h>
    #include <thrust/sequence.h>
    #include <iostream>
    
    #define PRINTER(name) print(#name, (name))
    template <template <typename...> class V, typename T, typename ...Args>
    void print(const char* name, const V<T,Args...> & v)
    {
        std::cout << name << ":\t";
        thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, "\t"));
        std::cout << std::endl;
    }
    
    template<typename... Iterators>
    __host__ __device__
    thrust::zip_iterator<thrust::tuple<Iterators...>> zip(Iterators... its)
    {
        return thrust::make_zip_iterator(thrust::make_tuple(its...));
    }
    
    
    int main()
    {
    
        const int size = 18;
        const int select_size = 5;
    
        float values[size] = {1,2,3,
                              4,5,6,
                              7,8,9,
                              4,5,6,
                              7,8,9,
                              0,1,2
        };
    
        thrust::host_vector<float> h_values (values, values+size);
        thrust::device_vector<float> d_values = h_values;
        thrust::device_vector<int> d_indices(size);
        thrust::sequence(d_indices.begin(), d_indices.end());
    
        PRINTER(d_values);
        PRINTER(d_indices);
        thrust::sort(zip(d_values.begin(), d_indices.begin()),zip(d_values.end(), d_indices.end()));
        PRINTER(d_values);
        PRINTER(d_indices);
    
        thrust::device_vector<float> d_values_s(select_size);
        thrust::device_vector<int> d_indices_s(select_size);
    
        thrust::copy(zip(d_values.end()-select_size, d_indices.end()-select_size),
                    zip(d_values.end(), d_indices.end()),
                    zip(d_values_s.begin(), d_indices_s.begin())
                    );
        PRINTER(d_values_s);
        PRINTER(d_indices_s);
    
        return 0;
    }