Loading Build/Makefile.local +24 −0 Original line number Diff line number Diff line Loading @@ -13,10 +13,32 @@ CUDA_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/lib64 FFTW_INCL= FFTW_LIB= ########################################################## #NVIDIA CUFFTMP CUFFTMP_LIB = -L/.../lib64 CUFFTMP_INCL = -I/.../include/cufftmp ########################################################## ########################################################## #NVIDIA NCCL REDUCE NCCL_INC = -I/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/include NCCL_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/lib ########################################################## NVC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/compilers/bin/nvc NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2 NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFT_INC) $(CUFFT_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcuda -lcudart -lcufftMp NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcuda -lcudart -lnccl NVCC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/bin/nvcc OPT_NVCC = -std=c++17 --generate-code arch=compute_86,code=sm_86 Loading @@ -25,6 +47,7 @@ CFLAGS += MPICHLIB = ########################################################## #AMD GPUs (DEFAULT = LUMI) Loading @@ -41,3 +64,4 @@ HIP_LIB= -L/opt/rocm-5.2.3/hip/lib AMDLIB = $(HIP_INCL) $(HIP_LIB) $(RCCL_INCL) $(RCCL_LIB) -D__HIP_PLATFORM_AMD__ -lamdhip64 -lrccl ########################################################### Makefile +19 −3 Original line number Diff line number Diff line Loading @@ -110,7 +110,7 @@ DEPS = w-stacking.h main.c allvars.h # ----- define which files will be compiled by MPICC # # these are the OBJS that will be compiled by C compiler if no acceleration (neither with OpenACC nor with OpenMP) is provided # these are the OBJS that will be compiled by C compiler if no acceleration (neither with CUDA nor with OpenMP) is provided CC_OBJ_NOACC = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o # these are the OBJs that will be compiled by the normal MPICC compiler if GPU acceleration is switched on Loading @@ -136,6 +136,12 @@ OBJ_NCCL_REDUCE = gridding_nccl.o DEPS_RCCL_REDUCE = gridding_rccl.cpp OBJ_RCCL_REDUCE = gridding_rccl.o # ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT # DEPS_ACC_CUFFTMP = w-stacking.h cuda_fft.cu OBJ_ACC_CUFFTMP = cuda_fft.o # ----------------------------------------------------- # # end of OBJ definition Loading Loading @@ -240,6 +246,16 @@ $(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE) OBJ += $(OBJ_RCCL_REDUCE) endif ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT))) EXEC_EXT := $(EXEC_EXT)_acc-fft LINKER=$(MPIC++) FLAGS=$(OPTIMIZE) LIBS=$(NVLIB) $(NVLIB_2) $(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP) $(NVCC) $(OPT) $(OPT_NVCC) $(CFLAGS) -c $^ $(LIBS) OBJ += $(OBJ_ACC_CUFFTMP) endif ################################################################################### Loading Loading
Build/Makefile.local +24 −0 Original line number Diff line number Diff line Loading @@ -13,10 +13,32 @@ CUDA_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/lib64 FFTW_INCL= FFTW_LIB= ########################################################## #NVIDIA CUFFTMP CUFFTMP_LIB = -L/.../lib64 CUFFTMP_INCL = -I/.../include/cufftmp ########################################################## ########################################################## #NVIDIA NCCL REDUCE NCCL_INC = -I/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/include NCCL_LIB = -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/comm_libs/nccl/lib ########################################################## NVC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/compilers/bin/nvc NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2 NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFT_INC) $(CUFFT_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcuda -lcudart -lcufftMp NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcuda -lcudart -lnccl NVCC = /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/bin/nvcc OPT_NVCC = -std=c++17 --generate-code arch=compute_86,code=sm_86 Loading @@ -25,6 +47,7 @@ CFLAGS += MPICHLIB = ########################################################## #AMD GPUs (DEFAULT = LUMI) Loading @@ -41,3 +64,4 @@ HIP_LIB= -L/opt/rocm-5.2.3/hip/lib AMDLIB = $(HIP_INCL) $(HIP_LIB) $(RCCL_INCL) $(RCCL_LIB) -D__HIP_PLATFORM_AMD__ -lamdhip64 -lrccl ###########################################################
Makefile +19 −3 Original line number Diff line number Diff line Loading @@ -110,7 +110,7 @@ DEPS = w-stacking.h main.c allvars.h # ----- define which files will be compiled by MPICC # # these are the OBJS that will be compiled by C compiler if no acceleration (neither with OpenACC nor with OpenMP) is provided # these are the OBJS that will be compiled by C compiler if no acceleration (neither with CUDA nor with OpenMP) is provided CC_OBJ_NOACC = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o # these are the OBJs that will be compiled by the normal MPICC compiler if GPU acceleration is switched on Loading @@ -136,6 +136,12 @@ OBJ_NCCL_REDUCE = gridding_nccl.o DEPS_RCCL_REDUCE = gridding_rccl.cpp OBJ_RCCL_REDUCE = gridding_rccl.o # ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT # DEPS_ACC_CUFFTMP = w-stacking.h cuda_fft.cu OBJ_ACC_CUFFTMP = cuda_fft.o # ----------------------------------------------------- # # end of OBJ definition Loading Loading @@ -240,6 +246,16 @@ $(OBJ_RCCL_REDUCE): $(DEPS_RCCL_REDUCE) OBJ += $(OBJ_RCCL_REDUCE) endif ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT))) EXEC_EXT := $(EXEC_EXT)_acc-fft LINKER=$(MPIC++) FLAGS=$(OPTIMIZE) LIBS=$(NVLIB) $(NVLIB_2) $(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP) $(NVCC) $(OPT) $(OPT_NVCC) $(CFLAGS) -c $^ $(LIBS) OBJ += $(OBJ_ACC_CUFFTMP) endif ################################################################################### Loading