Search code examples
error-handlingcudaruntime-errorthrust

Cuda::Thrust error saying "terminate called after throwing an instance of 'thrust::system::system_error'"


What I write is ;

#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sequence.h>
#include <thrust/transform.h>
#include <thrust/replace.h>
#include <thrust/copy.h>
#include <thrust/functional.h>
#include <iostream>
#include <cmath> //std:sqr

using namespace thrust;

// Kernel Code
template <class K>
struct sum_square_functor{
    __host__ __device__ K operator()(const K& x, const K& y)const{
        return pow(x-y,2);
    }
};

//Test code on CPU
//void perform_euclidean(){
//
//}


int main(){
    device_vector<float> p_vec(1 << 20);
    device_vector<float> q_vec(1 << 20);
    device_vector<float> r_vec(1 << 20);
    generate(p_vec.begin(), p_vec.end(), rand);
    generate(q_vec.begin(), q_vec.end(), rand);
    // Current Thrust's transformations supports 2 input vectors, so we use it
    transform(p_vec.begin(), p_vec.end(), q_vec.begin(), r_vec.begin(), sum_square_functor<float>());

    int sum = thrust::reduce(r_vec.begin(), r_vec.end(), (int)0, thrust::plus<float>());
    std::cout << "sqrt(" << sum  << ")=" << sqrt(sum) << std::endl;
    return 0;
}

and full error msg is;

terminate called after throwing an instance of 'thrust::system::system_error'
  what():  unspecified launch failure

What is wrong about the code? Any idea?

I've found that the error is caused by the generate() but still cannot get away the error?


Solution

  • rand is a host-library based function. You cannot use it directly in device code. When you attempt to generate using rand on a device vector, you are creating a device code kernel that is trying to use rand directly and it is failing.

    Instead, create those vectors on the host and copy them to the device, or else use a device compatible random generator (thrust has some).

    You should be able to do:

    host_vector<float> h_p_vec(1 << 20);
    host_vector<float> h_q_vec(1 << 20);
    generate(h_p_vec.begin(), h_p_vec.end(), rand);
    generate(h_q_vec.begin(), h_q_vec.end(), rand);
    device_vector<float> p_vec = h_p_vec;
    device_vector<float> q_vec = h_q_vec;