Choose whether performing stacking on CPUs or GPUs (404e0296) · Commits · Claudio Gheller / HPC_Imaging

Makefile

+9 −6

Original line number	Diff line number	Diff line
		@@ -59,9 +59,9 @@ OPT += -DPHASE_ON
		#OPT += -DNORMALIZE_UVW

		# Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
		#OPT += -DGAUSS_HI_PRECISION
		OPT += -DGAUSS_HI_PRECISION

		OPT += -DGAUSS
		#OPT += -DGAUSS

		#OPT += -DKAISERBESSEL

		@@ -77,7 +77,10 @@ OPT += -DGAUSS
		#OPT += -DCUDACC

		# use GPU acceleration via OMP
		#OPT += -DACCOMP
		OPT += -DACCOMP

		# perform stacking on GPUs
		#OPT += -DGPU_STACKING

		# use NVIDIA GPU to perform the reduce
		#OPT += -DNCCL_REDUCE
		@@ -135,8 +138,8 @@ OBJ_ACC_CUDA = phase_correction.o w-stacking.o

		# ----- define which files will be compiled by NVC with OMP offloading for wither Nvidia or AMD
		#
		DEPS_ACC_OMP = w-stacking_omp.h phase_correction.c w-stacking_omp.c
		OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o
		DEPS_ACC_OMP = w-stacking.h phase_correction.c w-stacking.c
		OBJ_ACC_OMP = phase_correction.o w-stacking.o


		# ----- define what files will be compiled by NVC with OMP offloading when the stacking reduce is
		@@ -174,7 +177,7 @@ endif

		ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT)))
		CFLAGS += $(FFTW_MPI_INC)
		ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
		ifeq (HYBRID_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
		FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm
		else
		FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm

+2 −2

Original line number	Diff line number	Diff line
		@@ -325,7 +325,7 @@ void wstack(
		omp_set_num_threads(num_threads);
		#endif

		#ifdef ACCOMP
		#if defined(ACCOMP) && (GPU_STACKING)
		omp_set_default_device(rank % omp_get_num_devices());
		uint Nvis = num_pointsfreq_per_chanpolarizations;
		#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2num_w_planesgrid_size_x*grid_size_y])
		@@ -420,7 +420,7 @@ void wstack(
		}

		}
		#ifdef ACCOMP
		#if defined(ACCOMP) && (GPU_STACKING)
		#pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan], grid[0:2num_w_planesgrid_size_x*grid_size_y])
		#endif
		// End switch between CUDA and CPU versions