cuBLAS cublasSgemv “Segmentation fault"

I have gotten a segmentation fault when running cublasSegmv.My GPU is K20Xm.Here is my code.

float *a, *x, *y;
int NUM_VEC = 8;
y = (float*)malloc(sizeof(float) * rows * NUM_VEC);
a = (float*)malloc(sizeof(float) * rows * cols);
x = (float*)malloc(sizeof(float) * cols * NUM_VEC);
get_mat_random(a, rows, cols);
get_vec_random(x, cols * NUM_VEC);

float *d_a = 0;
float *d_x = 0;
float *d_y = 0;

cudaMalloc((void **)&d_a, rows * cols * sizeof(float);
cudaMalloc((void **)&d_x, cols * NUM_VEC * sizeof(float);
cudaMalloc((void **)&d_y, rows * NUM_VEC * sizeof(float);
cublasSetVector(rows * cols, sizeof(float), a, 1, d_a, 1);
cublasSetVector(NUM_VEC * cols, sizeof(float), x, 1, d_x, 1);
cublasSetVector(NUM_VEC * rows, sizeof(float), y, 1, d_y, 1);
float alpha = 1.0f;
for (int i = 0; i < NUM_VEC; i++) {
  cublasSgemv(handle, CUBLAS_OP_T, cols, rows, &alpha, d_a, rows, d_x + i * cols, 1,0, d_y + i * rows, 1);
}

Solution

In my limited testing, the error is because the beta parameter of cublasSgemv cannot be NULL. You should allocate memory for the beta variable either on host or device. Following is the code which I used to reproduce and fix the error.

#include <cstdio>
#include <iostream>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cstdlib>

using namespace std;

void get_vec_random(float* a, int count)
{
    for(int i=0; i<count; i++)
        a[i] = rand() / float(RAND_MAX);    
}

void get_mat_random(float* a, int rows, int cols)
{
    get_vec_random(a, rows * cols);
}

int main(int argc, char** argv)
{
    int rows = 10, cols = 10;

    cublasHandle_t handle;
    cublasCreate(&handle);


    float *a, *x, *y;
    int NUM_VEC = 8;
    y = (float*)malloc(sizeof(float) * rows * NUM_VEC);
    a = (float*)malloc(sizeof(float) * rows * cols);
    x = (float*)malloc(sizeof(float) * cols * NUM_VEC);
    get_mat_random(a, rows, cols);
    get_vec_random(x, cols * NUM_VEC);

    float *d_a = 0;
    float *d_x = 0;
    float *d_y = 0;

    cudaMalloc((void **)&d_a, rows * cols * sizeof(float));
    cudaMalloc((void **)&d_x, cols * NUM_VEC * sizeof(float));
    cudaMalloc((void **)&d_y, rows * NUM_VEC * sizeof(float));


    cublasSetVector(rows * cols, sizeof(float), a, 1, d_a, 1);
    cublasSetVector(NUM_VEC * cols, sizeof(float), x, 1, d_x, 1);
    cublasSetVector(NUM_VEC * rows, sizeof(float), y, 1, d_y, 1);

    float alpha = 1.0f, beta = 1.0f;

    cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST);
    for (int i = 0; i < NUM_VEC; i++) 
    {
        cublasSgemv(handle, 
                    CUBLAS_OP_T, 
                    cols, 
                    rows, 
                    &alpha, 
                    d_a, 
                    rows, 
                    d_x + i * cols, 
                    1, 
                    &beta, 
                    d_y + i * rows, 
                    1);
    }
    return 0;
}

Hope this solves the problem.