I've been looking at Thrust and I stumbled upon a question that almost (but not quite) answered mine: Finding the maximum element value AND its position using CUDA Thrust
The example posted in there in the answer works fine, but how to do the same thing with raw pointers? Let us assume this code which I believe to be correct (ignore the kernel configuration, it's for simplicity):
float* d_A;
const unsigned int noElems = 10;
cudaMalloc(&d_A, noElems * sizeof(float));
initDeviceVector<<<1, noElems>>>(d_A);
thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(d_A);
thrust::device_vector<float>::iterator iter =
thrust::max_element(d_ptr, d_ptr + noElems);
I can't quite figure out how to extract the position using iter and raw pointers.
Thank you for your time.
There's probably a number of ways to do this. However working directly from your code, we can compare the value of iter
to a device_ptr
if we convert it to a suitable device pointer first.
The following fully worked example demonstrates this:
$ cat t436.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/extrema.h>
#include <stdio.h>
__global__ void initDeviceVector(float *data){
int idx = threadIdx.x+blockDim.x*blockIdx.x;
data[idx] = idx%7;
}
int main(){
float* d_A;
const unsigned int noElems = 10;
cudaMalloc(&d_A, noElems * sizeof(float));
initDeviceVector<<<1, noElems>>>(d_A);
thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(d_A);
thrust::device_vector<float>::iterator iter = thrust::max_element(d_ptr, d_ptr + noElems);
int pos = thrust::device_pointer_cast(&(iter[0])) - d_ptr;
printf("pos = %d\n", pos);
return 0;
}
$ nvcc -arch=sm_20 -o t436 t436.cu
$ ./t436
pos = 6
$