Loading Makefile +3 −3 Original line number Diff line number Diff line Loading @@ -42,10 +42,10 @@ FFTWLIBS = OPT += -DUSE_FFTW # use omp-ized version of fftw routines #OPT += -DHYBRID_FFTW OPT += -DHYBRID_FFTW # switch on the OpenMP parallelization #OPT += -DUSE_OMP OPT += -DUSE_OMP # ======================================================== Loading Loading @@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION #OPT += -DCUFFTMP # FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution OPT += -DFULL_NVIDIA #OPT += -DFULL_NVIDIA ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT))) OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP endif Loading cddeleted 100644 → 0 +0 −0 Empty file deleted. gridding_cpu.c +8 −7 Original line number Diff line number Diff line Loading @@ -67,12 +67,12 @@ void gridding_data() } // closes reduce_method == REDUCE_RING timing_wt.kernel = 0.0; timing_wt.reduce = 0.0; timing_wt.reduce_mpi = 0.0; timing_wt.reduce_sh = 0.0; timing_wt.compose = 0.0; //CLAAAA //timing_wt.kernel = 0.0; //timing_wt.reduce = 0.0; //timing_wt.reduce_mpi = 0.0; //timing_wt.reduce_sh = 0.0; //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); Loading Loading @@ -142,6 +142,7 @@ void gridding_data() double uumax = -1e20; double vvmax = -1e20; /* #pragma omp parallel reduction( min: uumin, vvmin) reduction( max: uumax, vvmax) num_threads(param.num_threads) { double my_uumin = 1e20; Loading @@ -165,7 +166,7 @@ void gridding_data() } //printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); */ timing_wt.compose += CPU_TIME_wt - start; Loading gridding_nccl.cu +6 −5 Original line number Diff line number Diff line Loading @@ -52,11 +52,12 @@ void gridding_data(){ double shift = (double)(dx*yaxis); timing_wt.kernel = 0.0; timing_wt.reduce = 0.0; timing_wt.reduce_mpi = 0.0; timing_wt.reduce_sh = 0.0; timing_wt.compose = 0.0; // CLAAAA //timing_wt.kernel = 0.0; //timing_wt.reduce = 0.0; //timing_wt.reduce_mpi = 0.0; //timing_wt.reduce_sh = 0.0; //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); Loading main.c +7 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,13 @@ int main(int argc, char * argv[]) { //CLAAAA timing_wt.kernel = 0.0; timing_wt.reduce = 0.0; timing_wt.reduce_mpi = 0.0; timing_wt.reduce_sh = 0.0; timing_wt.compose = 0.0; if(argc > 1) { strcpy(in.paramfile, argv[1]); Loading Loading
Makefile +3 −3 Original line number Diff line number Diff line Loading @@ -42,10 +42,10 @@ FFTWLIBS = OPT += -DUSE_FFTW # use omp-ized version of fftw routines #OPT += -DHYBRID_FFTW OPT += -DHYBRID_FFTW # switch on the OpenMP parallelization #OPT += -DUSE_OMP OPT += -DUSE_OMP # ======================================================== Loading Loading @@ -92,7 +92,7 @@ OPT += -DGAUSS_HI_PRECISION #OPT += -DCUFFTMP # FULL NVIDIA GPU SUPPORT - Recommended for full NVIDIA GPU code execution OPT += -DFULL_NVIDIA #OPT += -DFULL_NVIDIA ifeq (FULL_NVIDIA,$(findstring FULL_NVIDIA,$(OPT))) OPT += -DCUDACC -DNCCL_REDUCE -DCUFFTMP endif Loading
gridding_cpu.c +8 −7 Original line number Diff line number Diff line Loading @@ -67,12 +67,12 @@ void gridding_data() } // closes reduce_method == REDUCE_RING timing_wt.kernel = 0.0; timing_wt.reduce = 0.0; timing_wt.reduce_mpi = 0.0; timing_wt.reduce_sh = 0.0; timing_wt.compose = 0.0; //CLAAAA //timing_wt.kernel = 0.0; //timing_wt.reduce = 0.0; //timing_wt.reduce_mpi = 0.0; //timing_wt.reduce_sh = 0.0; //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); Loading Loading @@ -142,6 +142,7 @@ void gridding_data() double uumax = -1e20; double vvmax = -1e20; /* #pragma omp parallel reduction( min: uumin, vvmin) reduction( max: uumax, vvmax) num_threads(param.num_threads) { double my_uumin = 1e20; Loading @@ -165,7 +166,7 @@ void gridding_data() } //printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); */ timing_wt.compose += CPU_TIME_wt - start; Loading
gridding_nccl.cu +6 −5 Original line number Diff line number Diff line Loading @@ -52,11 +52,12 @@ void gridding_data(){ double shift = (double)(dx*yaxis); timing_wt.kernel = 0.0; timing_wt.reduce = 0.0; timing_wt.reduce_mpi = 0.0; timing_wt.reduce_sh = 0.0; timing_wt.compose = 0.0; // CLAAAA //timing_wt.kernel = 0.0; //timing_wt.reduce = 0.0; //timing_wt.reduce_mpi = 0.0; //timing_wt.reduce_sh = 0.0; //timing_wt.compose = 0.0; // calculate the resolution in radians resolution = 1.0/MAX(fabs(metaData.uvmin),fabs(metaData.uvmax)); Loading
main.c +7 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,13 @@ int main(int argc, char * argv[]) { //CLAAAA timing_wt.kernel = 0.0; timing_wt.reduce = 0.0; timing_wt.reduce_mpi = 0.0; timing_wt.reduce_sh = 0.0; timing_wt.compose = 0.0; if(argc > 1) { strcpy(in.paramfile, argv[1]); Loading