index of max_element with raw pointers

I've been looking at Thrust and I stumbled upon a question that almost (but not quite) answered mine: Finding the maximum element value AND its position using CUDA Thrust

The example posted in there in the answer works fine, but how to do the same thing with raw pointers? Let us assume this code which I believe to be correct (ignore the kernel configuration, it's for simplicity):

float* d_A;
const unsigned int noElems = 10;
cudaMalloc(&d_A, noElems * sizeof(float));
initDeviceVector<<<1, noElems>>>(d_A);

thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(d_A);     
thrust::device_vector<float>::iterator iter = 
    thrust::max_element(d_ptr, d_ptr + noElems);

I can't quite figure out how to extract the position using iter and raw pointers.

Thank you for your time.

Solution

There's probably a number of ways to do this. However working directly from your code, we can compare the value of iter to a device_ptr if we convert it to a suitable device pointer first.

The following fully worked example demonstrates this:

$ cat t436.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/extrema.h>
#include <stdio.h>


__global__ void initDeviceVector(float *data){
  int idx = threadIdx.x+blockDim.x*blockIdx.x;
  data[idx] = idx%7;
}

  int main(){

  float* d_A;
  const unsigned int noElems = 10;
  cudaMalloc(&d_A, noElems * sizeof(float));
  initDeviceVector<<<1, noElems>>>(d_A);

  thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(d_A);
  thrust::device_vector<float>::iterator iter = thrust::max_element(d_ptr, d_ptr + noElems);

  int pos = thrust::device_pointer_cast(&(iter[0])) - d_ptr;

  printf("pos = %d\n", pos);
  return 0;
}

$ nvcc -arch=sm_20 -o t436 t436.cu
$ ./t436
pos = 6
$