Loading Makefile +33 −9 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ FFTWLIBS = OPT += -DUSE_FFTW # use omp-ized version of fftw routines OPT += -DHYBRID_FFTW #OPT += -DHYBRID_FFTW # write the full 3D cube of gridded visibilities and its FFT transform #OPT += -DWRITE_DATA Loading Loading @@ -73,10 +73,10 @@ OPT += -DPHASE_ON #OPT += -DNVIDIA #use cuda for GPUs #OPT += -DCUDACC OPT += -DCUDACC # use GPU acceleration via OMP OPT += -DACCOMP #OPT += -DACCOMP # use NVIDIA GPU to perform the reduce #OPT += -DNCCL_REDUCE Loading @@ -85,7 +85,7 @@ OPT += -DACCOMP #OPT += -DRCCL_REDUCE # use GPU to perform FFT #OPT += -DCUFFTMP OPT += -DCUFFTMP #support for AMD GPUs #OPT += __HIP_PLATFORM_AMD__ Loading Loading @@ -141,8 +141,13 @@ OBJ_RCCL_REDUCE = gridding_rccl.o # ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT # DEPS_ACC_CUFFTMP = w-stacking_omp.h cuda_fft.cpp ifeq (CUDACC,$(findstring CUDACC,$(OPT))) DEPS_ACC_CUFFTMP = cuda_fft.cu OBJ_ACC_CUFFTMP = cuda_fft.o else DEPS_ACC_CUFFTMP = cuda_fft.cpp OBJ_ACC_CUFFTMP = cuda_fft.o endif # ----------------------------------------------------- Loading Loading @@ -173,13 +178,19 @@ w-stacking.c: w-stacking.cu phase_correction.c: phase_correction.cu cp phase_correction.cu phase_correction.c cuda_fft.cpp: cuda_fft.cu cp cuda_fft.cu cuda_fft.cpp else w-stacking.c: w-stacking.cu rm -f w-stacking.cun rm -f w-stacking.c touch w-stacking.c phase_correction.c: phase_correction.cu rm -f phase_correction.c touch phase_correction.c cuda_fft.cpp: cuda_fft.cu rm -f cuda_fft.cpp touch cuda_fft.cpp endif Loading Loading @@ -250,15 +261,28 @@ OBJ += $(OBJ_RCCL_REDUCE) endif ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT))) ifeq (CUDACC,$(findstring CUDACC,$(OPT))) EXEC_EXT := $(EXEC_EXT)_acc-fft LINKER=$(MPIC++) FLAGS=$(OPTIMIZE) LIBS=$(NVLIB_2) $(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP) $(NVCC) $(OPT_NVCC) $(OPT) -c $^ $(LIBS) OBJ += $(OBJ_ACC_CUFFTMP) else EXEC_EXT := $(EXEC_EXT)_acc-fft LINKER=$(NVC++) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) $(NVLIB_2) LIBS=$(NVLIB_2) $(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP) $(NVC++) $(FLAGS) $(OPT) -c $^ $(LIBS) OBJ += $(OBJ_ACC_CUFFTMP) endif endif ################################################################################### Loading @@ -270,11 +294,11 @@ w-stacking: $(OBJ) $(DEPS) Makefile %.o: %.c $(DEPS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) clean: rm -f *.o rm -f w-stacking.c rm -f phase_correction.c rm -f cuda_fft.cpp cleanall: rm -f $(EXEC)$(EXT) Loading allvars.h +0 −3 Original line number Diff line number Diff line Loading @@ -34,9 +34,6 @@ #include "w-stacking.h" #endif #if defined(CUDACC) #include <cuda.h> #endif #if defined(NVIDIA) #include <cuda_runtime.h> Loading allvars_nccl.h +1 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #include <unistd.h> #if !defined( NCCL_REDUCE ) #if !defined( NCCL_REDUCE ) && !defined(__CUDACC__) #include <stdatomic.h> #endif Loading main.c +0 −20 Original line number Diff line number Diff line Loading @@ -3,7 +3,6 @@ #include "allvars.h" #include "proto.h" void shutdown_wstacking( int errcode, char *message, char *fname, int linenum ) { if ( ( rank == 0 ) && Loading Loading @@ -77,25 +76,6 @@ int main(int argc, char * argv[]) FFT_INIT; #if defined(CUDACC) int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); if ( rank == 0 ) { if (0 == ndevices) { shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); } printf("Running rank %d/%d using GPU %d\n", rank, size, rank % ndevices); #ifdef NVIDIA prtAccelInfo(); #endif } #endif #ifdef ACCOMP if ( rank == 0 ) { if (0 == omp_get_num_devices()) { Loading phase_correction.cu +18 −1 Original line number Diff line number Diff line Loading @@ -11,6 +11,8 @@ #include <math.h> #include <stdlib.h> #include <stdio.h> #include "errcodes.h" #include "proto.h" #ifdef __CUDACC__ Loading Loading @@ -107,6 +109,21 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); if ( rank == 0 ) { if (0 == ndevices) { shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); } } printf("Running rank %d using GPU %d\n", rank, rank % ndevices); #ifdef NVIDIA prtAccelInfo(); #endif cudaError_t mmm; double * image_real_g; Loading Loading
Makefile +33 −9 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ FFTWLIBS = OPT += -DUSE_FFTW # use omp-ized version of fftw routines OPT += -DHYBRID_FFTW #OPT += -DHYBRID_FFTW # write the full 3D cube of gridded visibilities and its FFT transform #OPT += -DWRITE_DATA Loading Loading @@ -73,10 +73,10 @@ OPT += -DPHASE_ON #OPT += -DNVIDIA #use cuda for GPUs #OPT += -DCUDACC OPT += -DCUDACC # use GPU acceleration via OMP OPT += -DACCOMP #OPT += -DACCOMP # use NVIDIA GPU to perform the reduce #OPT += -DNCCL_REDUCE Loading @@ -85,7 +85,7 @@ OPT += -DACCOMP #OPT += -DRCCL_REDUCE # use GPU to perform FFT #OPT += -DCUFFTMP OPT += -DCUFFTMP #support for AMD GPUs #OPT += __HIP_PLATFORM_AMD__ Loading Loading @@ -141,8 +141,13 @@ OBJ_RCCL_REDUCE = gridding_rccl.o # ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT # DEPS_ACC_CUFFTMP = w-stacking_omp.h cuda_fft.cpp ifeq (CUDACC,$(findstring CUDACC,$(OPT))) DEPS_ACC_CUFFTMP = cuda_fft.cu OBJ_ACC_CUFFTMP = cuda_fft.o else DEPS_ACC_CUFFTMP = cuda_fft.cpp OBJ_ACC_CUFFTMP = cuda_fft.o endif # ----------------------------------------------------- Loading Loading @@ -173,13 +178,19 @@ w-stacking.c: w-stacking.cu phase_correction.c: phase_correction.cu cp phase_correction.cu phase_correction.c cuda_fft.cpp: cuda_fft.cu cp cuda_fft.cu cuda_fft.cpp else w-stacking.c: w-stacking.cu rm -f w-stacking.cun rm -f w-stacking.c touch w-stacking.c phase_correction.c: phase_correction.cu rm -f phase_correction.c touch phase_correction.c cuda_fft.cpp: cuda_fft.cu rm -f cuda_fft.cpp touch cuda_fft.cpp endif Loading Loading @@ -250,15 +261,28 @@ OBJ += $(OBJ_RCCL_REDUCE) endif ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT))) ifeq (CUDACC,$(findstring CUDACC,$(OPT))) EXEC_EXT := $(EXEC_EXT)_acc-fft LINKER=$(MPIC++) FLAGS=$(OPTIMIZE) LIBS=$(NVLIB_2) $(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP) $(NVCC) $(OPT_NVCC) $(OPT) -c $^ $(LIBS) OBJ += $(OBJ_ACC_CUFFTMP) else EXEC_EXT := $(EXEC_EXT)_acc-fft LINKER=$(NVC++) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) $(NVLIB_2) LIBS=$(NVLIB_2) $(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP) $(NVC++) $(FLAGS) $(OPT) -c $^ $(LIBS) OBJ += $(OBJ_ACC_CUFFTMP) endif endif ################################################################################### Loading @@ -270,11 +294,11 @@ w-stacking: $(OBJ) $(DEPS) Makefile %.o: %.c $(DEPS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) clean: rm -f *.o rm -f w-stacking.c rm -f phase_correction.c rm -f cuda_fft.cpp cleanall: rm -f $(EXEC)$(EXT) Loading
allvars.h +0 −3 Original line number Diff line number Diff line Loading @@ -34,9 +34,6 @@ #include "w-stacking.h" #endif #if defined(CUDACC) #include <cuda.h> #endif #if defined(NVIDIA) #include <cuda_runtime.h> Loading
allvars_nccl.h +1 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ #include <unistd.h> #if !defined( NCCL_REDUCE ) #if !defined( NCCL_REDUCE ) && !defined(__CUDACC__) #include <stdatomic.h> #endif Loading
main.c +0 −20 Original line number Diff line number Diff line Loading @@ -3,7 +3,6 @@ #include "allvars.h" #include "proto.h" void shutdown_wstacking( int errcode, char *message, char *fname, int linenum ) { if ( ( rank == 0 ) && Loading Loading @@ -77,25 +76,6 @@ int main(int argc, char * argv[]) FFT_INIT; #if defined(CUDACC) int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); if ( rank == 0 ) { if (0 == ndevices) { shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); } printf("Running rank %d/%d using GPU %d\n", rank, size, rank % ndevices); #ifdef NVIDIA prtAccelInfo(); #endif } #endif #ifdef ACCOMP if ( rank == 0 ) { if (0 == omp_get_num_devices()) { Loading
phase_correction.cu +18 −1 Original line number Diff line number Diff line Loading @@ -11,6 +11,8 @@ #include <math.h> #include <stdlib.h> #include <stdio.h> #include "errcodes.h" #include "proto.h" #ifdef __CUDACC__ Loading Loading @@ -107,6 +109,21 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); if ( rank == 0 ) { if (0 == ndevices) { shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); } } printf("Running rank %d using GPU %d\n", rank, rank % ndevices); #ifdef NVIDIA prtAccelInfo(); #endif cudaError_t mmm; double * image_real_g; Loading