Commit fa399562 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

CUFFTMP included

parent feb9a976
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
@@ -13,10 +13,32 @@ CUDA_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/lib64
FFTW_INCL=
FFTW_LIB=


##########################################################
#NVIDIA CUFFTMP

CUFFTMP_LIB  = -L/.../lib64
CUFFTMP_INCL = -I/.../include/cufftmp
##########################################################


##########################################################
#NVIDIA NCCL REDUCE

NCCL_INC = -I/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/include
NCCL_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/lib
##########################################################

NVC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/compilers/bin/nvc 
NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2  
NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart


NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFT_INC) $(CUFFT_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcuda -lcudart -lcufftMp

NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcuda -lcudart -lnccl


NVCC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/bin/nvcc
OPT_NVCC   = -std=c++17 --generate-code arch=compute_86,code=sm_86 

@@ -25,6 +47,7 @@ CFLAGS +=
MPICHLIB =



##########################################################
#AMD GPUs (DEFAULT = LUMI)

@@ -41,3 +64,4 @@ HIP_LIB= -L/opt/rocm-5.2.3/hip/lib

AMDLIB = $(HIP_INCL) $(HIP_LIB) $(RCCL_INCL) $(RCCL_LIB) -D__HIP_PLATFORM_AMD__ -lamdhip64 -lrccl
###########################################################
+19 −3
Original line number Diff line number Diff line
@@ -110,7 +110,7 @@ DEPS = w-stacking.h main.c allvars.h

# ----- define which files will be compiled by MPICC
#
# these are the OBJS that will be compiled by C compiler if no acceleration (neither with OpenACC nor with OpenMP) is provided
# these are the OBJS that will be compiled by C compiler if no acceleration (neither with CUDA nor with OpenMP) is provided
CC_OBJ_NOACC = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o

# these are the OBJs that will be compiled by the normal MPICC compiler if GPU acceleration is switched on
@@ -136,6 +136,12 @@ OBJ_NCCL_REDUCE = gridding_nccl.o
DEPS_RCCL_REDUCE = gridding_rccl.cpp
OBJ_RCCL_REDUCE  = gridding_rccl.o

# ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT
#
DEPS_ACC_CUFFTMP = w-stacking.h cuda_fft.cu
OBJ_ACC_CUFFTMP  = cuda_fft.o


# -----------------------------------------------------
#
# end of OBJ definition
@@ -240,6 +246,16 @@ $(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE)
OBJ += $(OBJ_RCCL_REDUCE)
endif

ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT)))
EXEC_EXT := $(EXEC_EXT)_acc-fft
LINKER=$(MPIC++)
FLAGS=$(OPTIMIZE)
LIBS=$(NVLIB) $(NVLIB_2)
$(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP)
	$(NVCC) $(OPT) $(OPT_NVCC) $(CFLAGS) -c $^ $(LIBS)
OBJ += $(OBJ_ACC_CUFFTMP)
endif


###################################################################################