Search code examples
cudagputhrust

do thrust::min_element on arrays on the gpu


I am trying to find the minimum of an array which is on the gpu. I can use min_element on things on the cpu, but not sure how to do it on things on the gpu. Also I am confused why the return of min_element has to be an array since there's only one minimum? this is the closest to what I think is correct, but I get : ' error: no suitable conversion function from "thrust::device_ptr" to "double *" exists ' for the min_element line.

code:

#include <stdio.h>
#include <stdlib.h> /* for rand() */
#include <unistd.h> /* for getpid() */
#include <time.h> /* for time() */
#include <math.h>
#include <assert.h>
#include <iostream>
#include <ctime>
#include <thrust/scan.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <thrust/extrema.h>
#include <cuda.h>

using namespace std;

bool errorAsk(const char *s="n/a")
{
cudaError_t err=cudaGetLastError();
if(err==cudaSuccess)
    return false;
printf("CUDA error [%s]: %s\n",s,cudaGetErrorString(err));
return true;
};

double *fillArray(double *c_idata,int N,double constant) {
    int n;
    for (n = 0; n < N; n++) {
            c_idata[n] = constant*floor(drand48()*10);

    }
return c_idata;
}

int main(int argc,char *argv[])
{
    int N;
    N = 100;

    double *c_data,*g_data,*result;
    result = new double[N];

    c_data = new double[N];
    c_data = fillArray(c_data,N,1);

    cudaMalloc(&g_data,N*sizeof(double));
    cudaMemcpy(g_data,c_data,N*sizeof(double),cudaMemcpyHostToDevice);
    thrust::device_ptr<double> g_ptr =  thrust::device_pointer_cast(g_data);

    result = thrust::min_element(g_ptr, g_ptr + N); // not sure how to get this to work
//        result = thrust::max_element(c_data, c_data + N); //works but I need to do this on the gpu

    cudaMemcpy(c_data,g_data,N*sizeof(double),cudaMemcpyDeviceToHost);

    cout<<result[0]<<endl;
}

Solution

  • thrust::min_element returns an iterator.

    From the documentation:

    min_element finds the smallest element in the range [first, last). It returns the first iterator i in [first, last) such that no other iterator in [first, last) points to a value smaller than *i.

    An iterator is something like a pointer. It indicates a position of an element in a container. Like pointers, iterators can be added to, subtracted from, etc.

    So we could extract this iterator directly:

    thrust::device_ptr<double> result_position = thrust::min_element(...
    

    or another approach would be to get a relative offset to that position, from the start of the container:

    int result_offset = thrust::min_element(g_ptr, ...) - g_ptr;
    

    This works because iterators (or thrust::device_ptr) can be subtracted. The iterator returned from min_element minus the start of the container will give the offset to the position of the minimum element.

    Here's a worked example based on your code:

    $ cat t957.cu
    #include <stdio.h>
    #include <stdlib.h> /* for rand() */
    #include <iostream>
    #include <thrust/device_ptr.h>
    #include <thrust/extrema.h>
    
    using namespace std;
    
    bool errorAsk(const char *s="n/a")
    {
    cudaError_t err=cudaGetLastError();
    if(err==cudaSuccess)
        return false;
    printf("CUDA error [%s]: %s\n",s,cudaGetErrorString(err));
    return true;
    };
    
    double *fillArray(double *c_idata,int N,double constant) {
        int n;
        for (n = 0; n < N; n++) {
                c_idata[n] = constant*floor(drand48()*10.0);
    
        }
    return c_idata;
    }
    
    int main(int argc,char *argv[])
    {
        int N;
        N = 100;
    
        double *c_data,*g_data;
    //    result = new double[N];
    
        c_data = new double[N];
        c_data = fillArray(c_data,N,1.0);
        c_data[32] = -1.0;
        cudaMalloc(&g_data,N*sizeof(double));
        cudaMemcpy(g_data,c_data,N*sizeof(double),cudaMemcpyHostToDevice);
        thrust::device_ptr<double> g_ptr =  thrust::device_pointer_cast(g_data);
    
        int result_offset = thrust::min_element(g_ptr, g_ptr + N) - g_ptr;
    
        double min_value = *(g_ptr + result_offset);
        // we could also do this:
        // double min_value = c_data[result_offset];
        std::cout<< "min value found at position: " << result_offset << " value: " << min_value << std::endl;
    }
    $ nvcc -o t957 t957.cu
    $ ./t957
    min value found at position: 32 value: -1
    $
    

    The thrust quick start guide gives a brief description of iterators and their usage in thrust.