Search code examples
cudakeyuniquethrust

Count number of unique elements using thrust unique_by_key when the set of values is a tuple


I am trying to use thrust::unique by key to find unique set of values. However I am using a tuple for the values. The key is of type int, the tuple if type (float, int).

My code as below works fine to find the set of unique values. However I also need to count the number of unique values.

I have seen the thrust example but I am unable to declare the output iterator type when the value is a tuple

thrust::pair<int*,int*> new_end;
new_end = thrust::unique_by_key(thrust::host, A, A + N, B);

My code and output after detecting unique is as below. kindly tell me how to count the number of unique elements.

#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <thrust/unique.h>


int main()
{
    const int N = 8;

        thrust::device_vector<int> keys1(N);
        thrust::device_vector<int> keys2(N);
        thrust::device_vector<float> value_keys(N);

        keys1[0] = 1; keys1[1] = 1; keys1[2] = 2; keys1[3] = 2; 
        keys1[4] = 2; keys1[5] = 3; keys1[6] = 3; keys1[7] = 3; 


        keys2[0] = 4; keys2[1] = 1; keys2[2] = 7; keys2[3] = 2; 
        keys2[4] = 6; keys2[5] = 8; keys2[6] = 3; keys2[7] = 5; 

        value_keys[0] = -0.01; value_keys[1] = 1.1; value_keys[2] = -0.07; value_keys[3] = 2.1; 
        value_keys[4] = 5.2; value_keys[5] = -0.08; value_keys[6] = 3.2; value_keys[7] = 5.1; 


        thrust::unique_by_key(thrust::device, keys1.begin(), keys1.end(), 
        thrust::make_zip_iterator(thrust::make_tuple(value_keys.begin(), keys2.begin())));


    std::cout << "Unique PAIRS:"<< std::endl;
    std::cout << "keys1, value_keys, keys2:" << std::endl;
    for (int i = 0; i < N; i++) {
        std::cout << keys1[i] <<'\t' << value_keys[i] <<'\t' << keys2[i] << std::endl;
    }

}

Output:

Unique PAIRS:
keys1, value_keys, keys2:
1   -0.01   4
2   -0.07   7
3   -0.08   8
2   2.1 2
2   5.2 6
3   -0.08   8
3   3.2 3
3   5.1 5

Solution

  • Here is one possible approach:

    As you've already pointed out, thrust::unique_by_key returns a pair of iterators. A pair in thrust is something like a tuple, and we can use the thrust tuple element access mechanism (thrust::get<...>) to retrieve individual members of the pair.

    Therefore, if we retrieve the first element of the pair, this corresponds to the result end iterator for the keys supplied to the thrust::unique_by_key algorithm. We can subtract from this the begin iterator for the keys, to retrieve the length of the keys in the result (which is the same as the length of the values in the result).

    Here is a worked example:

    $ cat t390.cu
    #include <thrust/device_vector.h>
    #include <thrust/sort.h>
    #include <thrust/execution_policy.h>
    #include <thrust/unique.h>
    
    
    int main()
    {
        const int N = 8;
    
            thrust::device_vector<int> keys1(N);
            thrust::device_vector<int> keys2(N);
            thrust::device_vector<float> value_keys(N);
    
            keys1[0] = 1; keys1[1] = 1; keys1[2] = 2; keys1[3] = 2;
            keys1[4] = 2; keys1[5] = 3; keys1[6] = 3; keys1[7] = 3;
    
    
            keys2[0] = 4; keys2[1] = 1; keys2[2] = 7; keys2[3] = 2;
            keys2[4] = 6; keys2[5] = 8; keys2[6] = 3; keys2[7] = 5;
    
            value_keys[0] = -0.01; value_keys[1] = 1.1; value_keys[2] = -0.07; value_keys[3] = 2.1;
            value_keys[4] = 5.2; value_keys[5] = -0.08; value_keys[6] = 3.2; value_keys[7] = 5.1;
    
    
            auto end = thrust::unique_by_key(thrust::device, keys1.begin(), keys1.end(),
              thrust::make_zip_iterator(thrust::make_tuple(value_keys.begin(), keys2.begin())));
            int result_size = thrust::get<0>(end) - keys1.begin();
    
        std::cout << "Unique PAIRS:"<< std::endl;
        std::cout << "keys1, value_keys, keys2:" << std::endl;
        for (int i = 0; i < result_size; i++) {
            std::cout << keys1[i] <<'\t' << value_keys[i] <<'\t' << keys2[i] << std::endl;
        }
    
    }
    $ nvcc -o t390 t390.cu -std=c++11
    $ ./t390
    Unique PAIRS:
    keys1, value_keys, keys2:
    1       -0.01   4
    2       -0.07   7
    3       -0.08   8
    $
    

    If you don't want to use c++11 auto, then you could adapt the above example by defining the returned pair explicitly yourself. This is determined by looking at the two input iterator types that the thrust::unique_by_key algorithm is using, and creating a pair out of them. Here is an example:

    $ cat t390.cu
    #include <thrust/device_vector.h>
    #include <thrust/sort.h>
    #include <thrust/execution_policy.h>
    #include <thrust/unique.h>
    
    typedef thrust::zip_iterator<thrust::tuple<thrust::device_vector<float>::iterator, thrust::device_vector<int>::iterator> > my_iter;
    typedef thrust::pair<thrust::device_vector<int>::iterator, my_iter> my_pair;
    
    
    int main()
    {
        const int N = 8;
    
            thrust::device_vector<int> keys1(N);
            thrust::device_vector<int> keys2(N);
            thrust::device_vector<float> value_keys(N);
    
            keys1[0] = 1; keys1[1] = 1; keys1[2] = 2; keys1[3] = 2;
            keys1[4] = 2; keys1[5] = 3; keys1[6] = 3; keys1[7] = 3;
    
    
            keys2[0] = 4; keys2[1] = 1; keys2[2] = 7; keys2[3] = 2;
            keys2[4] = 6; keys2[5] = 8; keys2[6] = 3; keys2[7] = 5;
    
            value_keys[0] = -0.01; value_keys[1] = 1.1; value_keys[2] = -0.07; value_keys[3] = 2.1;
            value_keys[4] = 5.2; value_keys[5] = -0.08; value_keys[6] = 3.2; value_keys[7] = 5.1;
    
    
            my_pair end = thrust::unique_by_key(thrust::device, keys1.begin(), keys1.end(),
              thrust::make_zip_iterator(thrust::make_tuple(value_keys.begin(), keys2.begin())));
            int result_size = thrust::get<0>(end) - keys1.begin();
    
        std::cout << "Unique PAIRS:"<< std::endl;
        std::cout << "keys1, value_keys, keys2:" << std::endl;
        for (int i = 0; i < result_size; i++) {
            std::cout << keys1[i] <<'\t' << value_keys[i] <<'\t' << keys2[i] << std::endl;
        }
    
    }
    $ nvcc -o t390 t390.cu
    $ ./t390
    Unique PAIRS:
    keys1, value_keys, keys2:
    1       -0.01   4
    2       -0.07   7
    3       -0.08   8
    $