Search code examples
cmakecudarosnvcc

nvcc Intermediate Link failure


I've been struggling to fix an intermediate linking error that I get when using CMake to build my nvcc project. I've been upgrading a previous project to utilize CUDA and was able to successfully call functions from that library from host code. When I try to call functions of that library from device code, I get the intermediate linking error. I annotated all of the functions with __device__ and __host__ descriptors.

As a side note, this is a ROS project, so I'm using some of the catkin CMake functions.

This is a snippet from the ParticleFilter code that calls the host and device functions:

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <curand_kernel.h>
#include <iostream>
#include <davinci_kinematics_cuda/davinci_fwd_kinematics.cuh>

__host__
ParticleFilter::ParticleFilter(const unsigned int numParticles, const std::vector<double> &initialJointState, const unsigned int threads,
        const unsigned int blocks) {

    /* random other work here */
    
    // This works fine (compiles and runs), it is calling host code from the other file
    kinematics = davinci_kinematics_cuda::Forward();
    std::cout << kinematics.fwd_kin_solve(initialJointState.data()).translation() << std::endl;
}

__global__
void printParticlesKernel(double *particles, const unsigned int numParticles, const unsigned int dimensions, const size_t pitch) {
    int locationStart = blockIdx.x * blockDim.x + threadIdx.x;
    int stride = blockDim.x * gridDim.x;

    // This fails, will not link
    davinci_kinematics_cuda::Forward kinematics = davinci_kinematics_cuda::Forward(); 

    for (int n = locationStart; n < numParticles; n += stride) {
        double *particle = (double*) ((char*) particles + n * pitch);
        
        /* random other work here */

        // this fails, will not link
        auto translation = kinematics.fwd_kin_solve(particle).translation();
        printf("%f %f %f\n", translation[0], translation[1], translation[2]);
    }
}

And this is from the kinematics file:

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>

namespace davinci_kinematics_cuda {

    // use member fncs to compute and multiply successive transforms
    __host__ __device__
    Forward::Forward() {
        /* random initialization here */
    }

    __host__ __device__
    Eigen::Affine3d Forward::fwd_kin_solve(const double *q_vec, const unsigned int desired_joint) {
        /* other work here */
    }
}

This is the relevant CMake parts for the ParticleFilter file.

cmake_minimum_required(VERSION 2.8.10)
project(tool_tracking LANGUAGES CUDA CXX)

# https://stackoverflow.com/questions/25748039/add-cuda-to-ros-package
find_package(CUDA REQUIRED) 

# set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
set(CUDA_SEPARABLE_COMPILATION ON)

find_package(catkin REQUIRED COMPONENTS
    message_generation
    roscpp
    std_msgs
    sensor_msgs
    geometry_msgs
    cwru_opencv_common
    tool_model
    cwru_davinci_control
    cwru_davinci_kinematics
    xform_utils
    tf
    tool_segmentation
)


catkin_package(
    INCLUDE_DIRS
        include
    LIBRARIES 
        tool_tracking_particle
    CATKIN_DEPENDS
        message_runtime
        std_msgs
        sensor_msgs
        geometry_msgs
        cwru_opencv_common
        tool_model
        cwru_davinci_control
        cwru_davinci_kinematics
        xform_utils
        tf
)

include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
include_directories(include ${catkin_INCLUDE_DIRS} tool_model_lib )

cuda_add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
target_link_libraries(test_particlefilter tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES})

This is the error from CMake:

/usr/bin/cmake -H/home/ethan/catkin_ws/src/cwru_davinci_tool_tracking/tool_tracking -B/home/ethan/catkin_ws/build/tool_tracking --check-build-system CMakeFiles/Makefile.cmake 0
/usr/bin/cmake -E cmake_progress_start /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/progress.marks
/usr/bin/make -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/home/ethan/catkin_ws/build/tool_tracking'
/usr/bin/make -f CMakeFiles/test_particlefilter.dir/build.make CMakeFiles/test_particlefilter.dir/depend
make[2]: Entering directory '/home/ethan/catkin_ws/build/tool_tracking'
[ 20%] Building NVCC intermediate link file CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o
/usr/local/cuda-11.0/bin/nvcc -lcudadevrt -m64 -ccbin /usr/bin/cc -dlink /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_Particle.cu.o -o /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/./test_particlefilter_intermediate_link.o -Xcompiler -fPIC
nvlink error   : Undefined reference to '_ZN23davinci_kinematics_cuda7ForwardC1Ev' in '/home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o'
nvlink error   : Undefined reference to '_ZN23davinci_kinematics_cuda7Forward13fwd_kin_solveEPKdj' in '/home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o'
CMakeFiles/test_particlefilter.dir/build.make:1468: recipe for target 'CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o' failed
make[2]: Leaving directory '/home/ethan/catkin_ws/build/tool_tracking'
make[2]: *** [CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o] Error 255
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/test_particlefilter.dir/all' failed
make[1]: Leaving directory '/home/ethan/catkin_ws/build/tool_tracking'
make[1]: *** [CMakeFiles/test_particlefilter.dir/all] Error 2
Makefile:140: recipe for target 'all' failed
make: *** [all] Error 2

How do I fix the error with the undefined reference? Seems like a linking error, but I am not familiar enough with the compilation / linking process to troubleshoot any further. If I need to post the CMake from the kinematics file I can as well.


Solution

  • Here's the key issue and the part that will be most helpful to other readers of this question. Catkin configures CMake to build shared libraries by default but CUDA separable compilation and nvlink only work with static libraries. You need to set your CUDA libraries (in your case, those in cwru_davinci_kinematics) to be static, always. You can do that by adding the STATIC keyword to the add_library call, as in:

    add_library(my_cuda_lib STATIC source1.cu ...)
    

    If you "link" to a shared library with CUDA in CMake, it will just ignore it. This is actually the documented behavior of nvcc. See here: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#libraries

    The device linker has the ability to read the static host library formats (.a on Linux and Mac OS X, .lib on Windows). It ignores any dynamic (.so or .dll) libraries.


    Another major lesson here is that setting CMake to an ancient version is bound to cause problems. While reproducing your issue, I was forced to build OpenCV 3 from source (it's not in Ubuntu 20.04 LTS) and there is no good way to override the search path for a particular package prior to version 3.12, which introduced CMP0074.

    Upgrade your minimum CMake version. Ideally you would upgrade to the newest version available to you in your software repositories and set your files' minimums to that. There is zero benefit to being compatible with CMake versions earlier than ~3.5, and I would argue that extends up to 3.16 (the version in Ubuntu 20.04 LTS). Since you're using CUDA, 3.18 is most appropriate. Even worse, many of your projects set a minimum below 2.8.12; compatibility with this version will very soon be removed by CMake.


    Here are the exact changes I made to get it to build on Ubuntu 20.04 LTS. I used the following build script, placed in and executed from the ROS workspace:

    #!/usr/bin/bash
    
    source /opt/ros/noetic/setup.bash
    
    export CUDACXX=/usr/local/cuda/bin/nvcc
    export OpenCV_ROOT=$(readlink -f opencv-install)
    
    [ -f "$CUDACXX" ] || { echo "Invalid CUDACXX: $CUDACXX"; exit; }
    [ -d "$OpenCV_ROOT" ] || { echo "Invalid OpenCV_ROOT: $OpenCV_ROOT"; exit; }
    
    rm -rf build devel
    catkin build tool_tracking --cmake-args \
        -Wno-dev \
        -DCMAKE_POLICY_DEFAULT_CMP0074=NEW \
        -DCMAKE_CUDA_ARCHITECTURES=75
    

    The directory opencv-install was created by building my own OpenCV 3 (because Ubuntu 20.04 only has v4). The steps for that were:

    $ git clone -b 3.4.14 git@github.com:opencv/opencv.git
    $ git clone -b 3.4.14 git@github.com:opencv/opencv_contrib.git
    $ cmake -G Ninja -S opencv -B opencv-build/ -DOPENCV_EXTRA_MODULES_PATH=$(readlink -f opencv_contrib)/modules -DBUILD_opencv_cnn_3dobj=OFF -DBUILD_opencv_face=OFF -DBUILD_opencv_hdf=OFF -DBUILD_opencv_hfs=OFF -DBUILD_opencv_julia=OFF -DBUILD_opencv_matlab=OFF -DBUILD_opencv_ovis=OFF -DBUILD_opencv_reg=OFF -DBUILD_opencv_sfm=OFF -DBUILD_opencv_text=OFF -DBUILD_opencv_wechat_qrcode=OFF -DBUILD_opencv_ximgproc=OFF
    $ cmake --build opencv-build
    $ cmake --install opencv-build --prefix opencv-install
    

    This disables the extra modules that have significant/irrelevant dependencies.

    The script sets the environment variable OpenCV_ROOT to direct CMake to this locally installed version of OpenCV. Because the minimum version of CMake specified in the file is so low, I must also set CMAKE_POLICY_DEFAULT_CMP0074=NEW so that OpenCV_ROOT will be honored.

    Here are the changes I made to your CMake code:

    src/cwru_davinci_kinematics/CMakeLists.txt

    --- a/src/cwru_davinci_kinematics/CMakeLists.txt
    +++ b/src/cwru_davinci_kinematics/CMakeLists.txt
    @@ -1,4 +1,4 @@
    -cmake_minimum_required(VERSION 2.8.10)
    +cmake_minimum_required(VERSION 3.18)
     project(cwru_davinci_kinematics)
     
     #This is needed as part of the migration to ros jade and later
    @@ -26,18 +26,16 @@ find_package(catkin REQUIRED COMPONENTS roscpp roslib roslint tf tf2 tf2_eigen)
     
     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=gnu++0x")
     
    -# https://stackoverflow.com/questions/25748039/add-cuda-to-ros-package
    -find_package(CUDA) 
    -message(STATUS "CUDA_FOUND=${CUDA_FOUND}")
    -if(CUDA_FOUND)
    -       message(STATUS "Found CUDA, setting nvcc compilation flags")
    -       
    -       # set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS         
    -       set(CUDA_NVCC_FLAGS CACHE STRING "nvcc flags" FORCE)
    -       set(CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
    +include(CheckLanguage)
    +check_language(CUDA)
    +if (CMAKE_CUDA_COMPILER)
    +  enable_language(CUDA)
    +
            # fPIC fixes some linker issues with nvcc code / objects
    -       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
    -       set(CUDA_SEPARABLE_COMPILATION ON)
    +       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
    +       set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
    +
    +  find_package(CUDAToolkit REQUIRED)
     endif()
     
     include_directories(
    @@ -48,7 +46,7 @@ include_directories(
       ${YAML_CPP_INCLUDE_DIRS}
     )
     
    -if (CUDA_FOUND)
    +if (CMAKE_CUDA_COMPILER)
            catkin_package(
                    DEPENDS ${Eigen3_DEP}
                    LIBRARIES 
    @@ -82,14 +80,17 @@ target_link_libraries(davinci_kinematics
       davinci_kinematic_definitions
       )
     
    -if (CUDA_FOUND)
    -       cuda_add_library(davinci_kinematics_cuda src/davinci_fwd_kinematics.cu)
    -       cuda_add_library(davinci_kinematics_definitions_cuda src/davinci_kinematic_definitions.cu)
    -       
    -       target_link_libraries(davinci_kinematics_cuda
    -               ${catkin_LIBRARIES}
    -               davinci_kinematics_definitions_cuda
    -       )
    +if (CMAKE_CUDA_COMPILER)
    +  add_library(davinci_kinematics_cuda STATIC src/davinci_fwd_kinematics.cu)
    +  add_library(davinci_kinematics_definitions_cuda STATIC src/davinci_kinematic_definitions.cu)
    +
    +  target_link_libraries(
    +    davinci_kinematics_cuda
    +    PRIVATE
    +      CUDA::curand
    +      ${catkin_LIBRARIES}
    +      davinci_kinematics_definitions_cuda
    +  )
     endif()
     
     # Examples
    

    The important lines here are:

      add_library(davinci_kinematics_cuda STATIC src/davinci_fwd_kinematics.cu)
      add_library(davinci_kinematics_definitions_cuda STATIC src/davinci_kinematic_definitions.cu)
    

    I also modernized the CMake code here, because the built-in CUDA language support has considerably advanced.

    src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt

    --- a/src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt
    +++ b/src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt
    @@ -1,18 +1,11 @@
    -cmake_minimum_required(VERSION 2.8.10)
    -project(tool_tracking LANGUAGES CUDA CXX)
    +cmake_minimum_required(VERSION 3.18)
    +project(tool_tracking LANGUAGES C CXX CUDA)
     
    -# https://stackoverflow.com/questions/25748039/add-cuda-to-ros-package
    -find_package(CUDA REQUIRED) 
    +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
    +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
     
    -# set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS
    -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
    -set(CUDA_SEPARABLE_COMPILATION ON)
    +find_package(OpenCV 3 REQUIRED)
     
    -#find_package(catkin_simple REQUIRED)
    -## Find catkin macros and libraries
    -## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
    -## is used, also find other catkin packages
    -find_package(OpenCV REQUIRED)
     find_package(catkin REQUIRED COMPONENTS
            message_generation
            roscpp
    @@ -28,11 +21,12 @@ find_package(catkin REQUIRED COMPONENTS
            tool_segmentation
     )
     
    +find_package(CUDAToolkit REQUIRED)
     
     catkin_package(
            INCLUDE_DIRS
                    include
    -       LIBRARIES 
    +       LIBRARIES
                    tool_tracking_particle
            CATKIN_DEPENDS
                    message_runtime
    @@ -47,13 +41,7 @@ catkin_package(
                    tf
     )
     
    -include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
    -include_directories(include ${catkin_INCLUDE_DIRS} tool_model_lib )
    -
    -#cuda_add_library(tool_tracking_particle src/ParticleFilter.cu src/Particle.cu)
    -#add_executable(particle src/tracking_particle.cpp)
    -#target_link_libraries(particle tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} davinci_kinematics_cuda 
    -#                      davinci_kinematics_definitions_cuda)
    -
    -cuda_add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
    -target_link_libraries(test_particlefilter tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES})
    +add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
    +target_include_directories(test_particlefilter SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS})
    +target_include_directories(test_particlefilter PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
    +target_link_libraries(test_particlefilter PRIVATE ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} CUDA::curand)
    

    I also modernized the CMake code here, because the built-in CUDA language support has considerably advanced.

    Miscellaneous changes

    I bumped the minimum CMake version from 2.8.x to 3.0.2 in all other places to suppress warnings. I also added a version number 3 to all find_package(OpenCV ...) calls that didn't have it.

    Boost no longer has a python3 package; it's just python now. I made the following change to src/vision_opencv/cv_bridge/CMakeLists.txt:

    --- a/src/vision_opencv/cv_bridge/CMakeLists.txt
    +++ b/src/vision_opencv/cv_bridge/CMakeLists.txt
    @@ -1,18 +1,15 @@
    -cmake_minimum_required(VERSION 2.8)
    +cmake_minimum_required(VERSION 3.0.2)
     project(cv_bridge)
     
     find_package(catkin REQUIRED COMPONENTS rosconsole sensor_msgs)
     
     if(NOT ANDROID)
       find_package(PythonLibs)
    -  if(PYTHONLIBS_VERSION_STRING VERSION_LESS 3)
    -    find_package(Boost REQUIRED python)
    -  else()
    -    find_package(Boost REQUIRED python3)
    -  endif()
    +  find_package(Boost REQUIRED python)
     else()
    -find_package(Boost REQUIRED)
    +  find_package(Boost REQUIRED)
     endif()
    +
     find_package(OpenCV 3 REQUIRED
       COMPONENTS
         opencv_core