Loading Build/Makefile.systype +1 −24 Original line number Diff line number Diff line CC = gcc-10 CXX = g++-10 MPICC = mpicc MPIC++ = mpiCC OPTIMIZE = -ggdb3 -O4 -fopenmp -march=native -mavx -mavx2 GSL_INCL = GSL_LIB = FFTW_INCL= FFTW_LIB= NVCC = NVFLAGS = NVLIB = CFLAGS += MPICHLIB = HDF5INCL = HDF5LIB = SYSTYPE=local Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ SYSTYPE := $(SYSTYPE) include Build/Makefile.$(SYSTYPE) else include Build/Makefile.systype include Build/Makefile.$(SYSTYPE) endif LINKER=$(MPICC) Loading allvars.h +0 −1 Original line number Diff line number Diff line Loading @@ -40,7 +40,6 @@ #include "errcodes.h" #define PI 3.14159265359 #define NUM_OF_SECTORS -1 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) #define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) #define NOVERBOSE Loading gridding_cpu.c +5 −7 Original line number Diff line number Diff line Loading @@ -100,7 +100,7 @@ void gridding_data() (Nvissec*2+Nweightss)*sizeof(float_t) ); if ( memory == NULL ) shutdown(NOT_ENOUGH_MEM_STACKING, "Not enough memory for stacking", __FILE__, __LINE__); shutdown_wstacking(NOT_ENOUGH_MEM_STACKING, "Not enough memory for stacking", __FILE__, __LINE__); double_t *uus = (double*) memory; double_t *vvs = (double*) uus+Nsec; Loading @@ -121,7 +121,8 @@ void gridding_data() uint inu = 0; #warning "this loop should be threaded" for(uint iphi = histo_send[isector]-1; iphi>=0; iphi--) #warning "the counter of this loop should not be int" for( int iphi = histo_send[isector]-1; iphi >=0 ; iphi--) { uint ilocal = sectorarray[isector][iphi]; Loading Loading @@ -233,7 +234,7 @@ void gridding_data() sprintf( message, "Some problem occurred in the ring reduce " "while processing sector %d", isector); free( memory ); shutdown( ERR_REDUCE, message, __FILE__, __LINE__); shutdown_wstacking( ERR_REDUCE, message, __FILE__, __LINE__); } } Loading @@ -248,9 +249,6 @@ void gridding_data() free( memory ); fclose(file.pFile1); if ( size > 1 ) { double start = CPU_TIME_wt; Loading gridding_nccl.cpp +20 −48 Original line number Diff line number Diff line Loading @@ -44,10 +44,6 @@ void gridding_data(){ double shift = (double)(dx*yaxis); #ifndef USE_MPI file.pFile1 = fopen (out.outfile1,"w"); #endif timing.kernel_time = 0.0; timing.kernel_time1 = 0.0; timing.reduce_time = 0.0; Loading Loading @@ -149,23 +145,6 @@ void gridding_data(){ timing.compose_time1 += (finishk.tv_sec - begink.tv_sec); timing.compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; #ifndef USE_MPI double vvmin = 1e20; double uumax = -1e20; double vvmax = -1e20; for (long ipart=0; ipart<Nsec; ipart++) { uumin = MIN(uumin,uus[ipart]); uumax = MAX(uumax,uus[ipart]); vvmin = MIN(vvmin,vvs[ipart]); vvmax = MAX(vvmax,vvs[ipart]); if(ipart%10 == 0)fprintf (file.pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]); } printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); #endif // Make convolution on the grid Loading Loading @@ -270,17 +249,10 @@ void gridding_data(){ //Copy data back from device to host (to be deleted in next steps) #ifdef NCCL_REDUCE cudaMemcpyAsync(grid, grid_gpu, 2*param.num_w_planes*xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost, stream_reduce); #endif #ifndef USE_MPI fclose(file.pFile1); #endif #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); Loading @@ -289,15 +261,15 @@ void gridding_data(){ timing.process_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); #ifdef NCCL_REDUCE cudaStreamSynchronize(stream_reduce); cudaFree(gridss_gpu); cudaFree(grid_gpu); cudaStreamDestroy(stream_reduce); ncclCommDestroy(comm); #endif } #endif Loading
Build/Makefile.systype +1 −24 Original line number Diff line number Diff line CC = gcc-10 CXX = g++-10 MPICC = mpicc MPIC++ = mpiCC OPTIMIZE = -ggdb3 -O4 -fopenmp -march=native -mavx -mavx2 GSL_INCL = GSL_LIB = FFTW_INCL= FFTW_LIB= NVCC = NVFLAGS = NVLIB = CFLAGS += MPICHLIB = HDF5INCL = HDF5LIB = SYSTYPE=local
Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ SYSTYPE := $(SYSTYPE) include Build/Makefile.$(SYSTYPE) else include Build/Makefile.systype include Build/Makefile.$(SYSTYPE) endif LINKER=$(MPICC) Loading
allvars.h +0 −1 Original line number Diff line number Diff line Loading @@ -40,7 +40,6 @@ #include "errcodes.h" #define PI 3.14159265359 #define NUM_OF_SECTORS -1 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) #define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) #define NOVERBOSE Loading
gridding_cpu.c +5 −7 Original line number Diff line number Diff line Loading @@ -100,7 +100,7 @@ void gridding_data() (Nvissec*2+Nweightss)*sizeof(float_t) ); if ( memory == NULL ) shutdown(NOT_ENOUGH_MEM_STACKING, "Not enough memory for stacking", __FILE__, __LINE__); shutdown_wstacking(NOT_ENOUGH_MEM_STACKING, "Not enough memory for stacking", __FILE__, __LINE__); double_t *uus = (double*) memory; double_t *vvs = (double*) uus+Nsec; Loading @@ -121,7 +121,8 @@ void gridding_data() uint inu = 0; #warning "this loop should be threaded" for(uint iphi = histo_send[isector]-1; iphi>=0; iphi--) #warning "the counter of this loop should not be int" for( int iphi = histo_send[isector]-1; iphi >=0 ; iphi--) { uint ilocal = sectorarray[isector][iphi]; Loading Loading @@ -233,7 +234,7 @@ void gridding_data() sprintf( message, "Some problem occurred in the ring reduce " "while processing sector %d", isector); free( memory ); shutdown( ERR_REDUCE, message, __FILE__, __LINE__); shutdown_wstacking( ERR_REDUCE, message, __FILE__, __LINE__); } } Loading @@ -248,9 +249,6 @@ void gridding_data() free( memory ); fclose(file.pFile1); if ( size > 1 ) { double start = CPU_TIME_wt; Loading
gridding_nccl.cpp +20 −48 Original line number Diff line number Diff line Loading @@ -44,10 +44,6 @@ void gridding_data(){ double shift = (double)(dx*yaxis); #ifndef USE_MPI file.pFile1 = fopen (out.outfile1,"w"); #endif timing.kernel_time = 0.0; timing.kernel_time1 = 0.0; timing.reduce_time = 0.0; Loading Loading @@ -149,23 +145,6 @@ void gridding_data(){ timing.compose_time1 += (finishk.tv_sec - begink.tv_sec); timing.compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; #ifndef USE_MPI double vvmin = 1e20; double uumax = -1e20; double vvmax = -1e20; for (long ipart=0; ipart<Nsec; ipart++) { uumin = MIN(uumin,uus[ipart]); uumax = MAX(uumax,uus[ipart]); vvmin = MIN(vvmin,vvs[ipart]); vvmax = MAX(vvmax,vvs[ipart]); if(ipart%10 == 0)fprintf (file.pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]); } printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); #endif // Make convolution on the grid Loading Loading @@ -270,17 +249,10 @@ void gridding_data(){ //Copy data back from device to host (to be deleted in next steps) #ifdef NCCL_REDUCE cudaMemcpyAsync(grid, grid_gpu, 2*param.num_w_planes*xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost, stream_reduce); #endif #ifndef USE_MPI fclose(file.pFile1); #endif #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); Loading @@ -289,15 +261,15 @@ void gridding_data(){ timing.process_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); #ifdef NCCL_REDUCE cudaStreamSynchronize(stream_reduce); cudaFree(gridss_gpu); cudaFree(grid_gpu); cudaStreamDestroy(stream_reduce); ncclCommDestroy(comm); #endif } #endif