Search code examples
compiler-errorscudanvidianvcc

Using __shfl_xor in my CUDA kernel but getting error when compiling


I am trying to use __shfl_xor in my kernel but when I try to compile it, I get the error "error: identifier "__shfl_xor" is undefined". I am aware that you have to set the flag arch=compute_30,code=sm_30 to use it but I have already added that in my CMakeLists.txt

Here is my kernel:

__global__ void dummy_kernel()
{
    int x = 5;
    int y = 10;
    __shfl_xor(x, y);
}

Here is the output from the compiler:

/filepath/kernel_file.cu(13): error: identifier "__shfl_xor" is undefined

Here is what my CMakeLists.txt looks like:

cmake_minimum_required(VERSION 3.1)

if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()

find_package(CUDA REQUIRED)

cuda_add_executable(CasHashing3D
    MatchPairGPU.cu
)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_30,code=sm_30")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35")
set_property(TARGET CasHashing3D PROPERTY CXX_STANDARD 11)
set_property(TARGET CasHashing3D PROPERTY CXX_STANDARD_REQUIRED ON)

configure_file(job.sh.in job.sh @ONLY)

The makefile generated by CMake is too large to paste in the question so here is a link to the file.


Solution

  • Somehow I changed my CMakeLists.txt to the following and it worked, I have no idea why. I will update the answer once I figure out what I was doing wrong.

    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -lpthread")
    cmake_minimum_required(VERSION 3.2)
    project(CasHashing3D)
    find_package(CUDA REQUIRED)
    # Pass options to NVCC
    set(
        CUDA_NVCC_FLAGS
        ${CUDA_NVCC_FLAGS};
        -O3 -gencode arch=compute_35,code=sm_35;
        )
    
    cuda_add_executable(CasHashing3D
        Main.cc
    )
    
    set_property(TARGET CasHashing3D PROPERTY CXX_STANDARD 11)
    set_property(TARGET CasHashing3D PROPERTY CXX_STANDARD_REQUIRED ON)
    configure_file(job.sh.in job.sh @ONLY)