Search code examples
cudaruntime-errorfftcufft

CuFFT Unknown Error


I have an array of 300,000 points and I want the fft of every 600 points. I'm attempting to use cufftPlanMany to execute, but I'm getting an unknown error here:

cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500));

retrevialfft.cu(82) : cufftSafeCall() CUFFT error: <unknown>

Here's the code in context

  cudaSetDevice(0);

  // Allocate host memory for the signal
  cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);


  // Initalize the memory for the signal
  for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
    h_signal[i].x = rand() / (float)RAND_MAX;
    h_signal[i].y = 0;

    //    printf("Orignal: %f %f \n", h_signal[i].x, h_signal[i].y);
  }




  int mem_size = sizeof(cufftComplex) * SIGNAL_SIZE;

  // Allocate device memory for signal
  cufftComplex* d_signal;
  cudaMalloc((void**)&d_signal, mem_size);

   int rank = 1; //1d plan                                                                                                                     
   int numCols = 300000;
   int n[] = {numCols};

   int batch = 500;
   int istride = 1;
   int ostride = 1;
   int idist = numCols;

  // CUFFT plan                                                                                                                                
   cufftHandle plan;
   cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500));

  // Transform signal                                                                                                                          
  printf("Transforming signal cufftExecC2C\n");
  cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));



     // Copy device memory to host                                                                                                                
  cufftComplex* h_transformed = (cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);;
  cudaMemcpy(h_transformed, d_signal, mem_size,
                           cudaMemcpyDeviceToHost);



//Destroy CUFFT context                                                                                                                      
  cufftDestroy(plan);

  // cleanup memory                                                                                                                            
  free(h_signal);

  free(h_transformed);
  cudaFree(d_signal);
  cudaDeviceReset();

Any idea of what the error actually is?


Solution

  • You decided not to show any more detail on your question. Below, I'm providing a full working code using cufftPlanMany() to execute batched 1D FFTs. I hope it helps.

    #include <stdio.h>
    #include <stdlib.h>
    #include <cufft.h>
    #include <assert.h>
    
    /********************/
    /* CUDA ERROR CHECK */
    /********************/
    #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
    {
        if (code != cudaSuccess) 
        {
            fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
            if (abort) { getchar(); exit(code); }
        }
    }
    
    /*********************/
    /* CUFFT ERROR CHECK */
    /*********************/
    static const char *_cudaGetErrorEnum(cufftResult error)
    {
        switch (error)
        {
            case CUFFT_SUCCESS:
                return "CUFFT_SUCCESS";
    
            case CUFFT_INVALID_PLAN:
                return "CUFFT_INVALID_PLAN";
    
            case CUFFT_ALLOC_FAILED:
                return "CUFFT_ALLOC_FAILED";
    
            case CUFFT_INVALID_TYPE:
                return "CUFFT_INVALID_TYPE";
    
            case CUFFT_INVALID_VALUE:
                return "CUFFT_INVALID_VALUE";
    
            case CUFFT_INTERNAL_ERROR:
                return "CUFFT_INTERNAL_ERROR";
    
            case CUFFT_EXEC_FAILED:
                return "CUFFT_EXEC_FAILED";
    
            case CUFFT_SETUP_FAILED:
                return "CUFFT_SETUP_FAILED";
    
            case CUFFT_INVALID_SIZE:
                return "CUFFT_INVALID_SIZE";
    
            case CUFFT_UNALIGNED_DATA:
                return "CUFFT_UNALIGNED_DATA";
        }
    
        return "<unknown>";
    }
    
    #define cufftSafeCall(err)      __cufftSafeCall(err, __FILE__, __LINE__)
    inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
    {
        if( CUFFT_SUCCESS != err) {
            fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
                _cudaGetErrorEnum(err)); \
                cudaDeviceReset(); assert(0); \
        }
    }
    
    /********/
    /* MAIN */
    /********/
    void main() {
    
        int batch = 3;                          // --- How many transforms to be performed
        int numCols = 16;                       // --- Size of each transform
    
        int SIGNAL_SIZE = batch * numCols;      // --- Overall size for all the signals 
    
        // --- Allocate host memory for all the signals
        cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);
    
        // --- Initalize host memory for all the signals
        for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
            h_signal[i].x = 1.f;
            h_signal[i].y = 0.f;
        }
    
        // --- Allocate device memory for all the signals
        cufftComplex* d_signal; gpuErrchk(cudaMalloc((void**)&d_signal, sizeof(cufftComplex) * SIGNAL_SIZE));
    
        // --- Host to Device memcopy
        gpuErrchk(cudaMemcpy(d_signal, h_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyHostToDevice));
    
        int rank = 1; // --- 1d plan                                                                                                                     
        int n[] = {numCols};
    
        int istride = 1;
        int ostride = 1;
        int idist = numCols;
        int odist = numCols;
    
        // --- CUFFT plan                                                                                                                                
        cufftHandle plan;
        cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, ostride, odist, CUFFT_C2C, 500));
    
        // --- Signals transformations
        cufftSafeCall(cufftExecC2C(plan, (cufftComplex*)d_signal, (cufftComplex*)d_signal, CUFFT_FORWARD));
    
        // --- Device to Host memcopy
        gpuErrchk(cudaMemcpy(h_signal, d_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost));
    
        for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) printf("Real part = %f; Imaginar part = %f\n", h_signal[i].x, h_signal[i].y);
    
        // --- Destroy CUFFT context                                                                                                                      
        cufftSafeCall(cufftDestroy(plan));
    
        // --- Memory cleanup
        free(h_signal);
        gpuErrchk(cudaFree(d_signal));
    
        cudaDeviceReset();
    
    }