Loading Makefile +9 −6 Original line number Diff line number Diff line Loading @@ -59,9 +59,9 @@ OPT += -DPHASE_ON #OPT += -DNORMALIZE_UVW # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL #OPT += -DGAUSS_HI_PRECISION OPT += -DGAUSS_HI_PRECISION OPT += -DGAUSS #OPT += -DGAUSS #OPT += -DKAISERBESSEL Loading @@ -77,7 +77,10 @@ OPT += -DGAUSS #OPT += -DCUDACC # use GPU acceleration via OMP #OPT += -DACCOMP OPT += -DACCOMP # perform stacking on GPUs #OPT += -DGPU_STACKING # use NVIDIA GPU to perform the reduce #OPT += -DNCCL_REDUCE Loading Loading @@ -135,8 +138,8 @@ OBJ_ACC_CUDA = phase_correction.o w-stacking.o # ----- define which files will be compiled by NVC with OMP offloading for wither Nvidia or AMD # DEPS_ACC_OMP = w-stacking_omp.h phase_correction.c w-stacking_omp.c OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o DEPS_ACC_OMP = w-stacking.h phase_correction.c w-stacking.c OBJ_ACC_OMP = phase_correction.o w-stacking.o # ----- define what files will be compiled by NVC with OMP offloading when the stacking reduce is Loading Loading @@ -174,7 +177,7 @@ endif ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) CFLAGS += $(FFTW_MPI_INC) ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT))) ifeq (HYBRID_FFTW,$(findstring HYBRID_FFTW,$(OPT))) FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm else FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm Loading w-stacking.cu +2 −2 Original line number Diff line number Diff line Loading @@ -325,7 +325,7 @@ void wstack( omp_set_num_threads(num_threads); #endif #ifdef ACCOMP #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); uint Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) Loading Loading @@ -420,7 +420,7 @@ void wstack( } } #ifdef ACCOMP #if defined(ACCOMP) && (GPU_STACKING) #pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan], grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #endif // End switch between CUDA and CPU versions Loading Loading
Makefile +9 −6 Original line number Diff line number Diff line Loading @@ -59,9 +59,9 @@ OPT += -DPHASE_ON #OPT += -DNORMALIZE_UVW # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL #OPT += -DGAUSS_HI_PRECISION OPT += -DGAUSS_HI_PRECISION OPT += -DGAUSS #OPT += -DGAUSS #OPT += -DKAISERBESSEL Loading @@ -77,7 +77,10 @@ OPT += -DGAUSS #OPT += -DCUDACC # use GPU acceleration via OMP #OPT += -DACCOMP OPT += -DACCOMP # perform stacking on GPUs #OPT += -DGPU_STACKING # use NVIDIA GPU to perform the reduce #OPT += -DNCCL_REDUCE Loading Loading @@ -135,8 +138,8 @@ OBJ_ACC_CUDA = phase_correction.o w-stacking.o # ----- define which files will be compiled by NVC with OMP offloading for wither Nvidia or AMD # DEPS_ACC_OMP = w-stacking_omp.h phase_correction.c w-stacking_omp.c OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o DEPS_ACC_OMP = w-stacking.h phase_correction.c w-stacking.c OBJ_ACC_OMP = phase_correction.o w-stacking.o # ----- define what files will be compiled by NVC with OMP offloading when the stacking reduce is Loading Loading @@ -174,7 +177,7 @@ endif ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) CFLAGS += $(FFTW_MPI_INC) ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT))) ifeq (HYBRID_FFTW,$(findstring HYBRID_FFTW,$(OPT))) FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm else FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm Loading
w-stacking.cu +2 −2 Original line number Diff line number Diff line Loading @@ -325,7 +325,7 @@ void wstack( omp_set_num_threads(num_threads); #endif #ifdef ACCOMP #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); uint Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) Loading Loading @@ -420,7 +420,7 @@ void wstack( } } #ifdef ACCOMP #if defined(ACCOMP) && (GPU_STACKING) #pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan], grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #endif // End switch between CUDA and CPU versions Loading