Search code examples
cudathrust

Expand and increment data by map count


I'm quite new to thrust (cuda), and am finding something challenging.


(Edited question to be simplified) I have an input vector and a map:

 vector = [8,23,46,500,2,7,91,91]
    map = [1, 0, 4,  3,1,0, 5, 3]

I want to expand this and increment the values to become:

new_vec = [8,46,47,48,49,500,501,502,2,91,92,93,94,95,91,92,93]
  • I realise the thrust/examples/expand.cu example already mostly does this, but I don't know how to efficiently increment the data value by the map count.
  • It would be helpful if someone could explain how to modify this example to achieve this.

Solution

  • Adapt the Thrust expand example to use exclusive_scan_by_key to rank each output element within its subsequence and then increment by that rank:

    #include <thrust/device_vector.h>
    #include <thrust/reduce.h>
    #include <thrust/gather.h>
    #include <thrust/scan.h>
    #include <thrust/fill.h>
    #include <thrust/copy.h>
    #include <thrust/iterator/constant_iterator.h>
    #include <thrust/functional.h>
    #include <iterator>
    #include <iostream>
    
    
    template<typename Vector>
    void print(const std::string& s, const Vector& v)
    {
      typedef typename Vector::value_type T;
    
      std::cout << s;
      thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, " "));
      std::cout << std::endl;
    }
    
    
    template<typename InputIterator1,
             typename InputIterator2,
             typename OutputIterator>
    void expand_and_increment(InputIterator1 first1,
                              InputIterator1 last1,
                              InputIterator2 first2,
                              OutputIterator output)
    {
      typedef typename thrust::iterator_difference<InputIterator1>::type difference_type;
    
      difference_type input_size  = thrust::distance(first1, last1);
      difference_type output_size = thrust::reduce(first1, last1);
    
      // scan the counts to obtain output offsets for each input element
      thrust::device_vector<difference_type> output_offsets(input_size);
      thrust::exclusive_scan(first1, last1, output_offsets.begin()); 
    
      print("output_offsets ", output_offsets);
    
      // scatter the nonzero counts into their corresponding output positions
      thrust::device_vector<difference_type> output_indices(output_size);
      thrust::scatter_if
        (thrust::counting_iterator<difference_type>(0),
         thrust::counting_iterator<difference_type>(input_size),
         output_offsets.begin(),
         first1,
         output_indices.begin());
    
      // compute max-scan over the output indices, filling in the holes
      thrust::inclusive_scan
        (output_indices.begin(),
         output_indices.end(),
         output_indices.begin(),
         thrust::maximum<difference_type>());
    
      print("output_indices ", output_indices);
    
      // gather input values according to index array (output = first2[output_indices])
      OutputIterator output_end = output; thrust::advance(output_end, output_size);
      thrust::gather(output_indices.begin(),
                     output_indices.end(),
                     first2,
                     output);
    
      // rank output_indices
      thrust::device_vector<difference_type> ranks(output_size);
      thrust::exclusive_scan_by_key(output_indices.begin(), output_indices.end(),
                                    thrust::make_constant_iterator<difference_type>(1),
                                    ranks.begin());
    
      print("ranks ", ranks);
    
      // increment output by ranks
      thrust::transform(output, output + output_size, ranks.begin(), output, thrust::placeholders::_1 + thrust::placeholders::_2);
    }
    
    
    int main(void)
    {
      int values[] = {8,23,46,500,2,7,91,91};
      int counts[] = {1, 0, 4,  3,1,0, 5, 3};
    
      size_t input_size  = sizeof(counts) / sizeof(int);
      size_t output_size = thrust::reduce(counts, counts + input_size);
    
      // copy inputs to device
      thrust::device_vector<int> d_counts(counts, counts + input_size);
      thrust::device_vector<int> d_values(values, values + input_size);
      thrust::device_vector<int> d_output(output_size);
    
      // expand values according to counts
      expand_and_increment(d_counts.begin(), d_counts.end(),
                           d_values.begin(),
                           d_output.begin());
    
      std::cout << "Expanding and incrementing values according to counts" << std::endl;
      print(" counts ", d_counts);
      print(" values ", d_values);
      print(" output ", d_output);
    
      return 0;
    }
    

    The output:

    $ nvcc expand_and_increment.cu -run
    output_offsets 0 1 1 5 8 9 9 14 
    output_indices 0 2 2 2 2 3 3 3 4 6 6 6 6 6 7 7 7 
    ranks 0 0 1 2 3 0 1 2 0 0 1 2 3 4 0 1 2 
    Expanding and incrementing values according to counts
     counts 1 0 4 3 1 0 5 3 
     values 8 23 46 500 2 7 91 91 
     output 8 46 47 48 49 500 501 502 2 91 92 93 94 95 91 92 93