Search code examples
c++cudathrust

convert CUDA device interleaved array to tuple for vector operations


How do I convert a device array that contains interleaved floats to a CUDA thrust tuple for thrust vector operations.

Purpose : I generate a crude list of vertices using Marching Cubes on CUDA. The output is a list of vertices, with redundancy and no connectivity. I wish to get a list of unique vertices and then an index buffer to these unique vertices, so I can perform some operations such as mesh simplification, etc...

float *devPtr; //this is device pointer that holds an array of floats
//6 floats represent a vertex, array size is vertsCount*6*sizeof(float).
//format is [v0x, v0y, v0z, n0x, n0y, n0z, v1x, v1y, v1z, n1x, ...]

typedef thrust::tuple<float, float, float, float, float, float> MCVertex;

thrust::device_vector<MCVertex> inputVertices(vertsCount);

//copy from *devPtr to inputVertices.

//use something like unique to get rid of redundancies.
thrust::unique(inputVertices.begin(), inputVertices.end());

how do I achieve the copy, or is there some other better way of doing this?


Solution

  • There is no need to copy, you can use a combination of thrust::zip_iterator and a strided_range iterator.

    The following example works for a list of floats where 3 consecutive values belong to each other. It can of course be extended to support more than that, it is just a matter of typing.

    The first step is to load some demo data on to the GPU, this uses a thrust::device_vector, but this results in a float* pointer just like you have.

    Based on the strided_range iterator and the thrust::zip_iterator the data is first sorted and then compacted. This code uses C++11 features, so compile it using:

    nvcc -std=c++11 unique.cu -o unique
    

    The output when running ./unique is:

    1 2 3 4 5 6 
    

    unique.cu

    #include <thrust/device_vector.h>
    #include <iostream>
    #include <thrust/unique.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/sort.h>
    #include <thrust/execution_policy.h>
    
    template<typename... Iterators>
    __host__ __device__
    thrust::zip_iterator<thrust::tuple<Iterators...>> zip(Iterators... its)
    {
        return thrust::make_zip_iterator(thrust::make_tuple(its...));
    }
    
    template <typename Iterator>
    struct strided_range
    {
        typedef typename thrust::iterator_difference<Iterator>::type difference_type;
    
        struct stride_functor : public thrust::unary_function<difference_type,difference_type>
        {
            difference_type stride;
    
            stride_functor(difference_type stride)
                : stride(stride) {}
    
            __host__ __device__
            difference_type operator()(const difference_type& i) const
            { 
                return stride * i;
            }
        };
    
        typedef typename thrust::counting_iterator<difference_type>                   CountingIterator;
        typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
        typedef typename thrust::permutation_iterator<Iterator,TransformIterator>     PermutationIterator;
    
        // type of the strided_range iterator
        typedef PermutationIterator iterator;
    
        // construct strided_range for the range [first,last)
        strided_range(Iterator first, Iterator last, difference_type stride)
            : first(first), last(last), stride(stride) {}
    
        iterator begin(void) const
        {
            return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
        }
    
        iterator end(void) const
        {
            return begin() + ((last - first) + (stride - 1)) / stride;
        }
    
        protected:
        Iterator first;
        Iterator last;
        difference_type stride;
    };
    
    int main()
    {
        const int stride = 3;
        const int num = 3;
    
        const int size = stride * num;
    
        float values[size] = {1,2,3,
                              4,5,6,
                              1,2,3};
    
    
        // in this example I use thrust vectors to simplify copying from host to device
        thrust::host_vector<float> h_vec (values, values+size);
        thrust::device_vector<float> d_vec = h_vec;
    
        // in your case, dev_ptr is your input pointer
        float* dev_ptr = thrust::raw_pointer_cast(d_vec.data());
    
        auto first =  strided_range<float*>(dev_ptr,   dev_ptr + size+1-stride,   stride);
        auto second = strided_range<float*>(dev_ptr+1, dev_ptr + size+1-stride+1, stride);
        auto third =  strided_range<float*>(dev_ptr+2, dev_ptr + size+1-stride+2, stride);
    
        auto zip_begin = zip(first.begin(),second.begin(), third.begin());
        auto zip_end = zip(first.end(), second.end(), third.end());
    
        thrust::sort(thrust::device, zip_begin, zip_end);
        auto new_end = thrust::unique(thrust::device, zip_begin,zip_end);
        std::size_t new_size = stride * (new_end - zip_begin);
    
        // use the underlying thrust::device_vector again to simplify printing
        thrust::copy(d_vec.begin(), d_vec.begin()+new_size, std::ostream_iterator<float>(std::cout, " "));
        std::cout << std::endl;
    
        return 0;
    }
    

    By the way: Be aware of floating point inaccuracies when trying to get unique values.


    I also created a generic version of the example above which builds the zip_iterator automatically and works for any number of consecutive elements. Since the official thrust version unfortunately does not yet support variadic tuples, we use a std::tuple to build the desired tuple type and then convert it into a thrust::tuple. If Andrew Corrigan's branch of thrust (which adds support for variadic tuples) was merged into thrust master, we could avoid using std::tuple at all.

    Compile this example using:

    nvcc generic_unique.cu -std=c++11 -o generic_unique
    

    The output when running ./generic_unique is:

    input data: 1 2 3 4 5 6 0 0 0 0 0 0 1 2 3 4 5 6 0 0 0 0 0 0 1 2 3 4 5 6 0 0 0 0 0 0 0 0 0 0 0 0 
    after sort: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6 
    after unique: 0 0 0 0 0 0 1 2 3 4 5 6 
    

    generic_unique.cu

    #include <tuple>
    #include <thrust/tuple.h>
    #include <thrust/device_vector.h>
    #include <iostream>
    #include <thrust/unique.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/sort.h>
    #include <thrust/execution_policy.h>
    
    // adapted from https://github.com/thrust/thrust/blob/master/examples/strided_range.cu
    template <typename Iterator, typename thrust::iterator_difference<Iterator>::type stride>
    class strided_range
    {
    public:
        typedef typename thrust::iterator_difference<Iterator>::type difference_type;
    
        //template <difference_type stride>
        struct stride_functor : public thrust::unary_function<difference_type,difference_type>
        {
            __host__ __device__
            difference_type operator()(const difference_type& i) const
            { 
                return stride * i;
            }
        };
    
        typedef typename thrust::counting_iterator<difference_type>                           CountingIterator;
        typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
        typedef typename thrust::permutation_iterator<Iterator,TransformIterator>             PermutationIterator;
    
        // type of the strided_range iterator
        typedef PermutationIterator iterator;
    
        // construct strided_range for the range [first,last)
        strided_range(Iterator first, Iterator last)
            : first(first), last(last) {}
    
        iterator begin(void) const
        {
            return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor()));
        }
    
        iterator end(void) const
        {
            return begin() + ((last - first) + (stride - 1)) / stride;
        }
    
    protected:
        Iterator first;
        Iterator last;
    };
    
    // copied from http://stackoverflow.com/a/16853775/678093
    template<typename, typename>
    struct append_to_type_seq { };
    
    template<typename T, typename... Ts, template<typename...> class TT>
    struct append_to_type_seq<T, TT<Ts...>>
    {
        using type = TT<Ts..., T>;
    };
    
    template<typename T, unsigned int N, template<typename...> class TT>
    struct repeat
    {
        using type = typename
            append_to_type_seq<
                T,
                typename repeat<T, N-1, TT>::type
                >::type;
    };
    
    template<typename T, template<typename...> class TT>
    struct repeat<T, 0, TT>
    {
        using type = TT<>;
    };
    
    template<typename Tuple> struct std_to_thrust_tuple;
    template<typename...T> struct std_to_thrust_tuple<std::tuple<T...>> {
      using type = thrust::tuple<T...>;
    };
    
    template<typename IteratorType, std::size_t stride>
    class zipped_strided_range
    {
    public:
    
        typedef typename strided_range<IteratorType, stride>::iterator SingleIterator;
        typedef typename repeat<SingleIterator, stride, std::tuple>::type StdIteratorTuple;
        typedef typename std_to_thrust_tuple<StdIteratorTuple>::type IteratorTuple;
        typedef decltype(thrust::make_zip_iterator(IteratorTuple())) ZipIterator;
    
        zipped_strided_range(IteratorType first, IteratorType last) : first(first), last(last)
        {
            assign<0>();
        }
    
        ZipIterator begin() const
        {
            return thrust::make_zip_iterator(begin_tuple);
        }
    
        ZipIterator end() const
        {
            return thrust::make_zip_iterator(end_tuple);
        }
    
    protected:
    
        template <std::size_t index>
        void assign(typename std::enable_if< (index < stride) >::type* = 0)
        {
            strided_range<IteratorType,stride> strided_range_iterator(first+index, last-(stride-1)+index);
    
            thrust::get<index>(begin_tuple) = strided_range_iterator.begin();
            thrust::get<index>(end_tuple) = strided_range_iterator.end();
            assign<index+1>();
        }
    
        template <std::size_t index>
        void assign(typename std::enable_if< (index == stride) >::type* = 0)
        {
            // end recursion
        }
    
        IteratorType first;
        IteratorType last;
    
        IteratorTuple begin_tuple;
        IteratorTuple end_tuple;
    };
    
    
    int main()
    {
    
        const int stride = 6;
        const int num = 6;
    
        const int size = stride * num;
    
        float values[size] = {1,2,3,4,5,6,
                              0,0,0,0,0,0,
                              1,2,3,4,5,6,
                              0,0,0,0,0,0,
                              1,2,3,4,5,6,
                              0,0,0,0,0,0
        };
    
    
        // in this example I use thrust vectors to simplify copying from host to device
        // it also simplifies printing
        thrust::host_vector<float> h_vec (values, values+size);
        thrust::device_vector<float> d_vec = h_vec;
    
        std::cout << "input data: ";
        thrust::copy(d_vec.begin(), d_vec.end(), std::ostream_iterator<float>(std::cout, " "));
        std::cout << std::endl;
    
        // in your case, dev_ptr is your input pointer
        float* dev_ptr = thrust::raw_pointer_cast(d_vec.data());
    
        zipped_strided_range<float*, stride> zipped(dev_ptr, dev_ptr+size);
    
    
        thrust::sort(thrust::device, zipped.begin(), zipped.end());
    
        std::cout << "after sort: ";
        thrust::copy(d_vec.begin(), d_vec.end(), std::ostream_iterator<float>(std::cout, " "));
        std::cout << std::endl;
    
        auto new_end = thrust::unique(thrust::device, zipped.begin(), zipped.end());
        std::size_t new_size = stride * (new_end - zipped.begin());
    
        std::cout << "after unique: ";
        d_vec.resize(new_size);
        thrust::copy(d_vec.begin(), d_vec.end(), std::ostream_iterator<float>(std::cout, " "));
        std::cout << std::endl;
    
        return 0;
    }