Search code examples
c++thrust

Unary function that takes tuple of iterators and produces basic type


I have a zip iterator that points to tuples of iterators. I want to provide thrust::transform with a functor that will grab elements using the tuple and produce a scalar output.

My program does not work and I don't know why.

I think it might have something to do with: CUDA thrust zip_iterator tuple transform_reduce, but changing the template parameters of the functor didn't do the trick.

The following code compiles:

#include <iostream>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>

typedef thrust::device_vector<double>::iterator realIter;
typedef thrust::tuple<realIter,realIter> Tup;
typedef thrust::zip_iterator<Tup> Zip;
typedef thrust::tuple<double,double> Tup2; //I tried replacing Tup with this in the functor

struct dummyOp : public thrust::unary_function<Tup, double> {
 __host__ __device__ double operator()(Tup &tup){
   double result = *thrust::get<0>(tup);
   return result;
 }
};

int main(){
  thrust::device_vector<double> A(4);
  thrust::device_vector<double> B(4);
  thrust::device_vector<double> C(4);

  A[0] =  1.; A[1] = 2.;
  A[2] =  3.; A[3] = 4.;

  B[0] =  4.;   B[1] = 3.;
  B[2] =  2.;   B[3] = 1.;

  Tup tup   = thrust::tuple<realIter,realIter>(A.begin(),B.begin());
  Zip zippy = thrust::zip_iterator<Tup>(tup);
  dummyOp f;

  // The following does not work:
  //thrust::transform(zippy, zippy + 4, C.begin(), f);

  std::cout << "A:\n";
  thrust::copy(A.begin(), A.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << "\nB:\n";
  thrust::copy(B.begin(), B.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << "\nC:\n";
  thrust::copy(C.begin(), C.end(), std::ostream_iterator<double>(std::cout, " "));
  std::cout << std::endl;
  std::cout <<"get<0>(zippy[0]) returns:\n" << thrust::get<0>(zippy[0]) << std::endl;
  std::cout <<"get<1>(zippy[1]) returns:\n" << thrust::get<1>(zippy[1]) << std::endl;

  return 0;
}

And running it gives:

$ ./so2
A:
1 2 3 4
B:
4 3 2 1
C:
0 0 0 0
get<0>(zippy[0]) returns:
1
get<1>(zippy[1]) returns:
3

With the offending line uncommented, we have:

$ nvcc -arch=compute_35 so2.cu -o so2
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/internal_functional.h(322): error: function "dummyOp::operator()" cannot be called with the given argument list
            argument types are: (thrust::detail::tuple_of_iterator_references<double &, double &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>)
            object type is: dummyOp
          detected during:
            instantiation of "thrust::detail::enable_if_non_const_reference_or_tuple_of_iterator_references<thrust::tuple_element<1, Tuple>::type>::type thrust::detail::unary_transform_functor<UnaryFunction>::operator()(Tuple) [with UnaryFunction=dummyOp, Tuple=thrust::detail::tuple_of_iterator_references<thrust::detail::tuple_of_iterator_references<double &, double &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, double &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/function.h(60): here
            instantiation of "Result thrust::detail::wrapped_function<Function, Result>::operator()(const Argument &) const [with Function=thrust::detail::unary_transform_functor<dummyOp>, Result=void, Argument=thrust::detail::tuple_of_iterator_references<thrust::detail::tuple_of_iterator_references<thrust::device_reference<double>, thrust::device_reference<double>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, thrust::device_reference<double>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/for_each.inl(57): here
            instantiation of "void thrust::system::cuda::detail::for_each_n_detail::for_each_kernel::operator()(thrust::system::cuda::detail::bulk_::parallel_group<thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<1UL>, 0UL>, 0UL> &, Iterator, Function, Size) [with Iterator=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Function=thrust::detail::wrapped_function<thrust::detail::unary_transform_functor<dummyOp>, void>, Size=unsigned int]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp(71): here
            instantiation of "void thrust::system::cuda::detail::bulk_::detail::apply_from_tuple(Function, const thrust::tuple<Arg1, Arg2, Arg3, Arg4, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> &) [with Function=thrust::system::cuda::detail::for_each_n_detail::for_each_kernel, Arg1=thrust::system::cuda::detail::bulk_::parallel_group<thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<1UL>, 0UL>, 0UL> &, Arg2=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Arg3=thrust::detail::wrapped_function<thrust::detail::unary_transform_functor<dummyOp>, void>, Arg4=unsigned int]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/detail/closure.hpp(50): here
            instantiation of "void thrust::system::cuda::detail::bulk_::detail::closure<Function, Tuple>::operator()() [with Function=thrust::system::cuda::detail::for_each_n_detail::for_each_kernel, Tuple=thrust::tuple<thrust::system::cuda::detail::bulk_::parallel_group<thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<1UL>, 0UL>, 0UL> &, thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, thrust::detail::wrapped_function<thrust::detail::unary_transform_functor<dummyOp>, void>, unsigned int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp(58): here
            [ 9 instantiation contexts not shown ]
            instantiation of "RandomAccessIterator thrust::system::cuda::detail::for_each(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, RandomAccessIterator, RandomAccessIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, RandomAccessIterator=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=thrust::detail::unary_transform_functor<dummyOp>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/for_each.inl(44): here
            instantiation of "InputIterator thrust::for_each(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=thrust::zip_iterator<thrust::tuple<Zip, thrust::detail::normal_iterator<thrust::device_ptr<double>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=thrust::detail::unary_transform_functor<dummyOp>]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/detail/generic/transform.inl(57): here
            instantiation of "OutputIterator thrust::system::detail::generic::transform(thrust::execution_policy<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=Zip, OutputIterator=thrust::detail::normal_iterator<thrust::device_ptr<double>>, UnaryFunction=dummyOp]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/transform.inl(44): here
            instantiation of "OutputIterator thrust::transform(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=Zip, OutputIterator=thrust::detail::normal_iterator<thrust::device_ptr<double>>, UnaryFunction=dummyOp]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/transform.inl(142): here
            instantiation of "OutputIterator thrust::transform(InputIterator, InputIterator, OutputIterator, UnaryFunction) [with InputIterator=Zip, OutputIterator=thrust::detail::normal_iterator<thrust::device_ptr<double>>, UnaryFunction=dummyOp]"
so2.cu(36): here

Update: While I would still really like help understanding the problem in the code above, the following works:

struct dummyOp {
 template <typename Tuple>
 __host__ __device__ double operator()(Tuple tup){
   double result = thrust::get<0>(tup);
   return result;
 }
};

The idea was stolen from here: https://github.com/thrust/thrust/blob/master/examples/arbitrary_transformation.cu

This does not constitute an answer to my own question because I still do not understand what is wrong with the types in the original code.

Also: it does not work to make the argument an explicit reference (Tuple &). Does that mean that tup is passed by value?


Solution

  • Generally speaking, when a zip iterator is dereferenced as part of a thrust algorithm, it creates a tuple of elementary types (i.e. not iterators, or pointers) that are passed to the functor in question.

    When we analyze the compiler output from your code with the "offending line":

    argument types are: (thrust::detail::tuple_of_iterator_references<double &, double &, 
    

    we observe that when thrust dereferences the zippy zip iterator, it produces a tuple of references to double items. We can use this to inform ourselves of the input type expected for the functor, namely:

    thrust::tuple<double &, double &>
    

    Since these are references to elementary types, it's not necessary for us to dereference these (as if they were pointers, or iterators) in the functor to take their value.

    The following modified code incorporates these ideas and compiles without error:

    $ cat t4.cu
    #include <iostream>
    #include <thrust/transform.h>
    #include <thrust/functional.h>
    #include <thrust/device_vector.h>
    #include <thrust/device_ptr.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/tuple.h>
    
    typedef thrust::device_vector<double>::iterator realIter;
    typedef thrust::tuple<realIter,realIter> Tup;
    typedef thrust::zip_iterator<Tup> Zip;
    typedef thrust::tuple<double &,double &> Tup2; //I tried replacing Tup with this in the functor
    
    struct dummyOp : public thrust::unary_function<Tup2, double> {
     __host__ __device__ double operator()(Tup2 &tup){
       double result = thrust::get<0>(tup);
       return result;
     }
    };
    
    int main(){
      thrust::device_vector<double> A(4);
      thrust::device_vector<double> B(4);
      thrust::device_vector<double> C(4);
    
      A[0] =  1.; A[1] = 2.;
      A[2] =  3.; A[3] = 4.;
    
      B[0] =  4.;   B[1] = 3.;
      B[2] =  2.;   B[3] = 1.;
    
      Tup tup   = thrust::tuple<realIter,realIter>(A.begin(),B.begin());
      Zip zippy = thrust::zip_iterator<Tup>(tup);
      dummyOp f;
    
      // The following does not work:
      thrust::transform(zippy, zippy + 4, C.begin(), f);
    
      std::cout << "A:\n";
      thrust::copy(A.begin(), A.end(), std::ostream_iterator<double>(std::cout, " "));
      std::cout << "\nB:\n";
      thrust::copy(B.begin(), B.end(), std::ostream_iterator<double>(std::cout, " "));
      std::cout << "\nC:\n";
      thrust::copy(C.begin(), C.end(), std::ostream_iterator<double>(std::cout, " "));
      std::cout << std::endl;
      std::cout <<"get<0>(zippy[0]) returns:\n" << thrust::get<0>(zippy[0]) << std::endl;
      std::cout <<"get<1>(zippy[1]) returns:\n" << thrust::get<1>(zippy[1]) << std::endl;
    
      return 0;
    }
    $ nvcc -arch=sm_61 -o t4 t4.cu
    $ ./t4
    A:
    1 2 3 4
    B:
    4 3 2 1
    C:
    1 2 3 4
    get<0>(zippy[0]) returns:
    1
    get<1>(zippy[1]) returns:
    3
    $
    

    As an alternative, templating the functor operator on the tuple type works, of course, because the compiler deduces the necessary type specifics and instantiates an appropriate version of the functor operator.