CUFFTMP included (fa399562) · Commits · Claudio Gheller / HPC_Imaging

Build/Makefile.local

+24 −0

Original line number	Diff line number	Diff line
		@@ -13,10 +13,32 @@ CUDA_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/lib64
		FFTW_INCL=
		FFTW_LIB=


		##########################################################
		#NVIDIA CUFFTMP

		CUFFTMP_LIB = -L/.../lib64
		CUFFTMP_INCL = -I/.../include/cufftmp
		##########################################################


		##########################################################
		#NVIDIA NCCL REDUCE

		NCCL_INC = -I/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/include
		NCCL_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/lib
		##########################################################

		NVC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/compilers/bin/nvc
		NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2
		NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart


		NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFT_INC) $(CUFFT_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcuda -lcudart -lcufftMp

		NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcuda -lcudart -lnccl


		NVCC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/bin/nvcc
		OPT_NVCC = -std=c++17 --generate-code arch=compute_86,code=sm_86

		@@ -25,6 +47,7 @@ CFLAGS +=
		MPICHLIB =



		##########################################################
		#AMD GPUs (DEFAULT = LUMI)

		@@ -41,3 +64,4 @@ HIP_LIB= -L/opt/rocm-5.2.3/hip/lib

		AMDLIB = $(HIP_INCL) $(HIP_LIB) $(RCCL_INCL) $(RCCL_LIB) -D__HIP_PLATFORM_AMD__ -lamdhip64 -lrccl
		###########################################################

Makefile

+19 −3

Original line number	Diff line number	Diff line
		@@ -110,7 +110,7 @@ DEPS = w-stacking.h main.c allvars.h

		# ----- define which files will be compiled by MPICC
		#
		# these are the OBJS that will be compiled by C compiler if no acceleration (neither with OpenACC nor with OpenMP) is provided
		# these are the OBJS that will be compiled by C compiler if no acceleration (neither with CUDA nor with OpenMP) is provided
		CC_OBJ_NOACC = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o

		# these are the OBJs that will be compiled by the normal MPICC compiler if GPU acceleration is switched on
		@@ -136,6 +136,12 @@ OBJ_NCCL_REDUCE = gridding_nccl.o
		DEPS_RCCL_REDUCE = gridding_rccl.cpp
		OBJ_RCCL_REDUCE = gridding_rccl.o

		# ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT
		#
		DEPS_ACC_CUFFTMP = w-stacking.h cuda_fft.cu
		OBJ_ACC_CUFFTMP = cuda_fft.o


		# -----------------------------------------------------
		#
		# end of OBJ definition
		@@ -240,6 +246,16 @@ $(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE)
		OBJ += $(OBJ_RCCL_REDUCE)
		endif

		ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT)))
		EXEC_EXT := $(EXEC_EXT)_acc-fft
		LINKER=$(MPIC++)
		FLAGS=$(OPTIMIZE)
		LIBS=$(NVLIB) $(NVLIB_2)
		$(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP)
		$(NVCC) $(OPT) $(OPT_NVCC) $(CFLAGS) -c $^ $(LIBS)
		OBJ += $(OBJ_ACC_CUFFTMP)
		endif


		###################################################################################