Search code examples
cudagpunumerical-methodscartesian

Generate Cartesian Product using CUDA on GPU


I would like to know a method to generate Cartesian product using CUDA on GPU.
Simple case: We have two lists:

 A = {0.0, 0.1, 0.2}   B = {0.0, 0.1, 0.2}
 A x B = C = { {0.0, 0.0}, {0.0, 0.1}, {0.0, 0.2}, {0.1, 0.0}, {0.1, 0.1} ...}

How can I generate (list of list) C in GPU? How can this be done for N lists with M values each.

The terminology that I am using might be incorrect. I can try explaining what I mean:
I am essentially trying to generate a truth table: a binary truth table would look like

A binary truth table would look like

A     B
0     0
0     1
1     0
1     1

where A has two values {0, 1} and B has {0, 1}. In my case A and B has more than two values, for starters 31 values (0 - 30). For every value in set A, I have 31 values in set B, I need to enumerate them and store them in memory.

Other than that, i need to extend the algorithm to N list instead of 2 lists (A and B)


Solution

  • I don't claim this is efficient; just functional:

    #include <thrust/device_vector.h>
    #include <thrust/pair.h>
    #include <thrust/copy.h>
    #include <iterator>
    
    __global__ void cartesian_product(const int *a, size_t a_size,
                                      const int *b, size_t b_size,
                                      thrust::pair<int,int> *c)
    {
      unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
    
      if(idx < a_size * b_size)
      {
        unsigned int a_idx = idx / a_size;
        unsigned int b_idx = idx % a_size;
    
        c[idx] = thrust::make_pair(a[a_idx], b[b_idx]);
      }
    }
    
    int main()
    {
      thrust::device_vector<int> a(3);
      a[0] = 0; a[1] = 1; a[2] = 2;
    
      thrust::device_vector<int> b(3);
      b[0] = 0; b[1] = 1; b[2] = 2;
    
      thrust::device_vector<thrust::pair<int,int> > c(a.size() * b.size());
    
      unsigned int block_size = 256;
      unsigned int num_blocks = (c.size() + (block_size - 1)) / block_size;
    
      cartesian_product<<<num_blocks, block_size>>>(thrust::raw_pointer_cast(a.data()), a.size(),
                                                    thrust::raw_pointer_cast(b.data()), b.size(),
                                                    thrust::raw_pointer_cast(c.data()));
    
      std::cout << "a: { ";
      thrust::copy(a.begin(), a.end(), std::ostream_iterator<int>(std::cout, ", "));
      std::cout << "}" << std::endl;
    
      std::cout << "b: { ";
      thrust::copy(b.begin(), b.end(), std::ostream_iterator<int>(std::cout, ", "));
      std::cout << "}" << std::endl;
    
      std::cout << "c: { ";
      for(unsigned int i = 0; i < c.size(); ++i)
      {
        thrust::pair<int,int> x = c[i];
        std::cout << "(" << x.first << ", " << x.second << "), ";
      }
      std::cout << "}" << std::endl;
    
      return 0;
    }
    

    The program's output:

    $ nvcc cartesian_product.cu -run
    a: { 0, 1, 2, }
    b: { 0, 1, 2, }
    c: { (0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2), }