Search code examples
gcccudalinkerg++nvcc

Compiling with nvcc and g++


I'm using Nvidia's nvcc compiler to compile a .cpp file which will contain device code (but only contains host code now). The program I am using is written in C and C++ and uses gcc/g++ to compile with and g++ is used to link the final executable together. Originally, when I run my program using -Ofast, it runs in about 2 seconds, but I factored in nvcc to compile 1 .cpp file and the rest with gcc/g++, have g++ link it together, the program takes about 8 seconds to run. If I compile more .cpp files with nvcc, it slows down even more (~10 seconds). I tried adjusting my make file to compile everything with nvcc but I received too many errors from the C code. Only the C++ code seems to be able to run without errors when compiled with nvcc. Does nvcc not optimise the host code? Why does this happen? What is the best way to structure the program?

Makefile

CC = gcc
CCPP = g++
NVCC = /vol/cuda/8.0.61/bin/nvcc

#preprocessor defines
DEFINES = -DUSE_ZLIB

OPTFLAGS = -Ofast

#CFLAGS = -ansi -pedantic -Wall -Werror -fPIC 
#CFLAGS = -ansi -pedantic -Wall -fPIC 
CFLAGS = -ansi $(OPTFLAGS)
NVFLAGS = -x c++ -Wno-deprecated-gpu-targets -ccbin g++

NVLINKFLAGS = -L/vol/cuda/8.0.61/lib64/ -lcuda -lcudart
#NVLINKFLAGS += -L/vol/cuda/8.0.61/lib -lcuda -lcudart

CPPFLAGS = $(OPTFLAGS) -I./include $(DEFINES)
LINKFLAGS = -lz

all : simple

objs =  src/tourtre.o     \
    src/ctArc.o       \
    src/ctBranch.o    \
    src/ctComponent.o \
    src/ctNode.o      \
    src/ctQueue.o     \
    src/ctNodeMap.o   \
    examples/simple/obj/Data.o \
    examples/simple/obj/Mesh.o \
    examples/simple/obj/main.o

simple : $(objs)
    $(CCPP)  $(CPPFLAGS) -o simple $^ $(LINKFLAGS) $(NVLINKFLAGS)

src/tourtre.o : src/tourtre.c include/tourtre.h src/ctMisc.h include/ctArc.h include/ctNode.h src/ctComponent.h include/ctNode.h src/ctQueue.h src/ctAlloc.h
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

src/ctArc.o : src/ctArc.c include/tourtre.h src/ctMisc.h include/ctArc.h 
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

src/ctBranch.o : src/ctBranch.c include/tourtre.h src/ctMisc.h include/ctBranch.h 
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

src/ctComponent.o : src/ctComponent.c include/tourtre.h src/ctMisc.h src/ctComponent.h 
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

src/ctNode.o : src/ctNode.c include/tourtre.h src/ctMisc.h include/ctNode.h 
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

src/ctQueue.o : src/ctQueue.c include/tourtre.h src/ctMisc.h src/ctQueue.h 
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

src/ctNodeMap.o : src/ctNodeMap.c src/ctNodeMap.h include/ctNode.h src/ctQueue.h src/sglib.h
    $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

examples/simple/obj/Data.o: examples/simple/src/Data.cpp examples/simple/src/Data.h examples/simple/src/Global.h 
    $(CCPP) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

examples/simple/obj/Mesh.o: examples/simple/src/Mesh.cpp examples/simple/src/Data.h examples/simple/src/Global.h examples/simple/src/Mesh.h 
    $(NVCC) $(NVFLAGS) -c $< -o $@

examples/simple/obj/main.o: examples/simple/src/main.cpp examples/simple/src/Data.h examples/simple/src/Global.h examples/simple/src/Mesh.h 
    $(CCPP) $(CPPFLAGS) $(CFLAGS) -c $< -o $@


clean :
    -rm -rf src/*.o examples/simple/obj/*.o doc/html

Solution

  • Add -Xcompiler -ansi -Xcompiler -Ofast to NVFLAGS so that the host compiler gets the same (optimization) flags from nvcc as when run directly.