CUDA 8.0 nvcc fatal: single input file required for a non-link phase when an ouputtfile is specified

I'm trying to link object files in my CUDA project. Below is my makefile:

CUDA_PATH := /usr/local/cuda
NVCC := $(CUDA_PATH)/bin/nvcc

NVCCFLAGS := -arch=sm_37 --device-c -std=c++11 -cudart=shared -rdc=true

LIBS := -lcutil -lcudpp -lcuda -lcudart -lcurand

LIBPATH := $(CUDA_PATH)/lib64
SOLIBS := $(LIBPATH)/*.so

OBJS := main.o mtx.o mpamp_for_loop_funs.o cuBLAS_funs.o sparsify_threshold.o

########################################################################

# link
all: $(OBJs)
    $(NVCC) $(NVCCFLAGS) $(OBJS) $(SOLIBS) $(LIBS) -o mpamp
#--output-file mpamp.o

########################################################################

# compile individually
main.o: main.cu header.h
    $(NVCC) $(NVCCFLAGS) -c main.cu

mtx.o: mtx.cu header.h
    $(NVCC) $(NVCCFLAGS) -c mtx.cu

mpamp_for_loop_funs.o: mpamp_for_loop_funs.cu header.h
    $(NVCC) $(NVCCFLAGS) -c mpamp_for_loop_funs.cu

cuBLAS_funs.o: cuBLAS_funs.cu header.h
    $(NVCC) $(NVCCFLAGS) -c cuBLAS_funs.cu

sparsify_threshold.o: sparsify_threshold.cu header.h
    $(NVCC) $(NVCCFLAGS) -c sparsify_threshold.cu

########################################################################

run: build
    $(EXEC) ./mpamp

clean:
    \rm *.o *~ mpamp

########################################################################

I have tried removing the $(SOLIBS), it returns the same error:

ece$ make all
/usr/local/cuda/bin/nvcc -arch=sm_37 --device-c -std=c++11 -cudart=shared -rdc=true main.o mtx.o mpamp_for_loop_funs.o cuBLAS_funs.o sparsify_threshold.o /usr/local/cuda/lib64/*.so -lcutil -lcudpp -lcuda -lcudart -lcurand -o mpamp
nvcc fatal   : A single input file is required for a non-link phase when an outputfile is specified
make: *** [all] Error 1

In addition, when I remove -o mpamp, the make all command works, but does not generate an output file to then execute.

Does anyone have any tips on getting past this error?

I recently moved from Visual Studio in Windows to a Linux machine. VS did the compiling and linking 'automatically' (I added slashes and newlines):

nvcc -dlink -o x64\Debug\MPAMP.device-link.obj -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MTd " \
-L"\lib\x64" cublas.lib cublas_device.lib cudadevrt.lib curand.lib 
cudart.lib cudart_static.lib \
kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib \
odbc32.lib odbccp32.lib  -gencode=arch=compute_35,code=sm_35 -G \
--machine 64 x64\Debug\cuBLAS_funs.cu.obj x64\Debug\inner_loop.cu.obj x64\Debug\main.cu.obj \
x64\Debug\mpamp_for_loop_funs.cu.obj x64\Debug\mtx.cu.obj x64\Debug\sparsify_threshold.cu.obj

Update: Per the answer below, the pertinent makefile lines are now:

NVCCFLAGS := -arch=sm_37 --device-c -std=c++11

NVCCLFLAGS := -arch=sm_37 -std=c++11 -cudart=shared -rdc=true

LDFLAGS := -I$(CUDA_PATH)/include -L$(CUDAPATH)/lib64

LIBPATH := $(CUDA_PATH)/lib64
SOLIBS := $(LIBPATH)/libcublas.so $(LIBPATH)/libcurand.so $(LIBPATH)/libcudart.so

OBJS := main.o mtx.o mpamp_for_loop_funs.o cuBLAS_funs.o sparsify_threshold.o

LIBS := -lcutil -lcudpp -lcuda -lcudart -lcurand -lcublas

########################################################################

# link
all: $(OBJs)
    $(NVCC) $(NVCCLFLAGS) $(LDFLAGS) -o mpamp $(OBJS) $(SOLIBS) $(LIBS)

But, I receive the following error:

ece$ make all
/usr/local/cuda/bin/nvcc -arch=sm_37 -std=c++11 -cudart=shared -rdc=true -I/usr/local/cuda/include -L/lib64 -o mpamp main.o mtx.o mpamp_for_loop_funs.o cuBLAS_funs.o sparsify_threshold.o /usr/local/cuda/lib64/libcublas.so /usr/local/cuda/lib64/libcurand.so /usr/local/cuda/lib64/libcudart.so -lcutil -lcudpp -lcuda -lcudart -lcurand -lcublas
nvlink error   : Undefined reference to 'cublasDgemm_v2' in 'cuBLAS_funs.o'
make: *** [all] Error 255

I have tried rearranging the flags in the linking statement as per this question, to no avail.

Solved! Thanks for your help. Here's the final changes:

LIBPATH := $(CUDA_PATH)/lib64
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
SOLIBS := $(LIBPATH)/libcublas.so $(LIBPATH)/libcurand.so

OBJS := main.o mtx.o mpamp_for_loop_funs.o cuBLAS_funs.o sparsify_threshold.o

LIBS := -lcuda -lcurand -lcublas -lcublas_device

Solution

The fundamental problem here is that you have jumbled compile and link switches together, attempting to use the same set of switches both for compile and link.

Coupled with this, your code appears to use or depend on CUDA separable compilation with device linking, and it is not possible in that case to use the same set of switches for both compile and link, unless the compile and link phases are all combined, which they are not in your example.

Careful study of the nvcc manual for the switches you are using will identify the issues.

When you specify --device-c, you are indicating to the compiler (nvcc) that this is a compile phase/step only (just as specifying -c would for a gnu compiler toolchain). Therefore specifying that switch for any kind of link process is not sensible.

The solution with the smallest number of changes is to remove that from your link phase command. One possible approach would be to create an additional Makefile variable:

NVCCLFLAGS := -arch=sm_37 -std=c++11 -cudart=shared -rdc=true

and modify your link phase command to use that:

# link
all: $(OBJs)
    $(NVCC) $(NVCCLFLAGS) $(OBJS) $(SOLIBS) $(LIBS) -o mpamp