What I write is ;
#include <thrust/system_error.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sequence.h>
#include <thrust/transform.h>
#include <thrust/replace.h>
#include <thrust/copy.h>
#include <thrust/functional.h>
#include <iostream>
#include <cmath> //std:sqr
using namespace thrust;
// Kernel Code
template <class K>
struct sum_square_functor{
__host__ __device__ K operator()(const K& x, const K& y)const{
return pow(x-y,2);
}
};
//Test code on CPU
//void perform_euclidean(){
//
//}
int main(){
device_vector<float> p_vec(1 << 20);
device_vector<float> q_vec(1 << 20);
device_vector<float> r_vec(1 << 20);
generate(p_vec.begin(), p_vec.end(), rand);
generate(q_vec.begin(), q_vec.end(), rand);
// Current Thrust's transformations supports 2 input vectors, so we use it
transform(p_vec.begin(), p_vec.end(), q_vec.begin(), r_vec.begin(), sum_square_functor<float>());
int sum = thrust::reduce(r_vec.begin(), r_vec.end(), (int)0, thrust::plus<float>());
std::cout << "sqrt(" << sum << ")=" << sqrt(sum) << std::endl;
return 0;
}
and full error msg is;
terminate called after throwing an instance of 'thrust::system::system_error'
what(): unspecified launch failure
What is wrong about the code? Any idea?
I've found that the error is caused by the generate()
but still cannot get away the error?
rand
is a host-library based function. You cannot use it directly in device code. When you attempt to generate
using rand
on a device vector, you are creating a device code kernel that is trying to use rand
directly and it is failing.
Instead, create those vectors on the host and copy them to the device, or else use a device compatible random generator (thrust has some).
You should be able to do:
host_vector<float> h_p_vec(1 << 20);
host_vector<float> h_q_vec(1 << 20);
generate(h_p_vec.begin(), h_p_vec.end(), rand);
generate(h_q_vec.begin(), h_q_vec.end(), rand);
device_vector<float> p_vec = h_p_vec;
device_vector<float> q_vec = h_q_vec;