Commit 404e0296 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Choose whether performing stacking on CPUs or GPUs

parent ac418ec4
Loading
Loading
Loading
Loading
+9 −6
Original line number Diff line number Diff line
@@ -59,9 +59,9 @@ OPT += -DPHASE_ON
#OPT += -DNORMALIZE_UVW

# Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
#OPT += -DGAUSS_HI_PRECISION
OPT += -DGAUSS_HI_PRECISION

OPT += -DGAUSS
#OPT += -DGAUSS

#OPT += -DKAISERBESSEL

@@ -77,7 +77,10 @@ OPT += -DGAUSS
#OPT += -DCUDACC

# use GPU acceleration via OMP 
#OPT += -DACCOMP
OPT += -DACCOMP

# perform stacking on GPUs
#OPT += -DGPU_STACKING

# use NVIDIA GPU to perform the reduce
#OPT += -DNCCL_REDUCE
@@ -135,8 +138,8 @@ OBJ_ACC_CUDA = phase_correction.o w-stacking.o

# ----- define which files will be compiled by NVC with OMP offloading for wither Nvidia or AMD
#
DEPS_ACC_OMP = w-stacking_omp.h phase_correction.c w-stacking_omp.c
OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o
DEPS_ACC_OMP = w-stacking.h phase_correction.c w-stacking.c 
OBJ_ACC_OMP = phase_correction.o w-stacking.o 


# ----- define what files will be compiled by NVC with OMP offloading when the stacking reduce is
@@ -174,7 +177,7 @@ endif

ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT)))
CFLAGS += $(FFTW_MPI_INC)
ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
ifeq (HYBRID_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm
else
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm
+2 −2
Original line number Diff line number Diff line
@@ -325,7 +325,7 @@ void wstack(
    omp_set_num_threads(num_threads);
#endif

#ifdef ACCOMP
   #if defined(ACCOMP) && (GPU_STACKING)
    omp_set_default_device(rank % omp_get_num_devices());
    uint Nvis = num_points*freq_per_chan*polarizations;
   #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
@@ -420,7 +420,7 @@ void wstack(
        }
	
    }
   #ifdef ACCOMP
   #if defined(ACCOMP) && (GPU_STACKING)
   #pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan], grid[0:2*num_w_planes*grid_size_x*grid_size_y])
   #endif
    // End switch between CUDA and CPU versions