Loading Makefile +4 −5 Original line number Diff line number Diff line Loading @@ -19,8 +19,8 @@ endif LINKER=$(MPICC) FFTW_MPI_INC = -I/home/giacopo/Library_fftw/include FFTW_MPI_LIB = -L/home/giacopo/Library_fftw/lib FFTW_MPI_INC = FFTW_MPI_LIB = CFLAGS += -I./ FFTWLIBS = Loading Loading @@ -51,7 +51,7 @@ OPT += -DPHASE_ON #OPT += -DNVIDIA #use cuda for GPUs #OPT += -D__CUDACC__ #OPT += -DCUDACC # use GPU acceleration via OMP #OPT += -DACCOMP Loading Loading @@ -153,7 +153,6 @@ endif w-stacking: $(COBJ) $(DEPS) Makefile @$(LINKER) $(FLAGS) $(OPT) $(FFTWLIBS) $(LIBS) -lmpi -o $(EXEC)$(EXEC_EXT) $(COBJ): $(DEPS) Makefile %.o: %.c $(DEPS) Loading fourier_transform.c +204 −217 Original line number Diff line number Diff line Loading @@ -7,8 +7,9 @@ void fftw_data(){ #ifdef USE_FFTW // FFT transform the data (using distributed FFTW) if(rank == 0)printf("PERFORMING FFT\n"); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); double start = CPU_TIME_wt; fftw_plan plan; fftw_complex *fftwgrid; ptrdiff_t alloc_local, local_n0, local_0_start; Loading @@ -16,7 +17,7 @@ void fftw_data(){ //Use the hybrid MPI-OpenMP FFTW #ifdef HYBRID_FFTW if (threads_ok) fftw_plan_with_nthreads(param.num_threads); fftw_plan_with_nthreads(param.num_threads); #endif // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) // x is the direction of contiguous data and maps to the second parameter Loading Loading @@ -75,20 +76,10 @@ void fftw_data(){ fftw_free(fftwgrid); #ifdef ONE_SIDE MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #else MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); timing_wt.fftw += CPU_TIME_wt - start; #endif Loading @@ -98,7 +89,7 @@ void write_fftw_data(){ #ifdef USE_FFTW #ifdef WRITE_DATA // Write results // Write results let's skip this part for the moment #ifdef USE_MPI MPI_Win writewin; MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); Loading Loading @@ -175,19 +166,15 @@ void write_fftw_data(){ // Phase correction clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); double start = CPU_TIME_wt; if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.phase_time1 = (finish.tv_sec - begin.tv_sec); timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; timing_wt.phase += CPU_TIME_wt - start; #ifdef WRITE_IMAGE Loading @@ -198,15 +185,15 @@ void write_fftw_data(){ fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); Loading @@ -224,9 +211,9 @@ void write_fftw_data(){ fclose(file.pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE Loading gridding.c +0 −1 Original line number Diff line number Diff line Loading @@ -8,7 +8,6 @@ void free_array ( uint *, uint **, int ); void initialize_array ( void ); void gridding_data ( void ); int reduce_ring ( int ); Loading gridding_std.c +1 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ #if !defined( NCCL_REDUCE ) int reduce_ring (int); // ..................................................................... // Loading init.c +3 −1 Original line number Diff line number Diff line Loading @@ -5,6 +5,8 @@ void init(int index) { double start_tot; start_tot = CPU_TIME_wt; double begin = CPU_TIME_pr; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization Loading Loading
Makefile +4 −5 Original line number Diff line number Diff line Loading @@ -19,8 +19,8 @@ endif LINKER=$(MPICC) FFTW_MPI_INC = -I/home/giacopo/Library_fftw/include FFTW_MPI_LIB = -L/home/giacopo/Library_fftw/lib FFTW_MPI_INC = FFTW_MPI_LIB = CFLAGS += -I./ FFTWLIBS = Loading Loading @@ -51,7 +51,7 @@ OPT += -DPHASE_ON #OPT += -DNVIDIA #use cuda for GPUs #OPT += -D__CUDACC__ #OPT += -DCUDACC # use GPU acceleration via OMP #OPT += -DACCOMP Loading Loading @@ -153,7 +153,6 @@ endif w-stacking: $(COBJ) $(DEPS) Makefile @$(LINKER) $(FLAGS) $(OPT) $(FFTWLIBS) $(LIBS) -lmpi -o $(EXEC)$(EXEC_EXT) $(COBJ): $(DEPS) Makefile %.o: %.c $(DEPS) Loading
fourier_transform.c +204 −217 Original line number Diff line number Diff line Loading @@ -7,8 +7,9 @@ void fftw_data(){ #ifdef USE_FFTW // FFT transform the data (using distributed FFTW) if(rank == 0)printf("PERFORMING FFT\n"); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); double start = CPU_TIME_wt; fftw_plan plan; fftw_complex *fftwgrid; ptrdiff_t alloc_local, local_n0, local_0_start; Loading @@ -16,7 +17,7 @@ void fftw_data(){ //Use the hybrid MPI-OpenMP FFTW #ifdef HYBRID_FFTW if (threads_ok) fftw_plan_with_nthreads(param.num_threads); fftw_plan_with_nthreads(param.num_threads); #endif // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) // x is the direction of contiguous data and maps to the second parameter Loading Loading @@ -75,20 +76,10 @@ void fftw_data(){ fftw_free(fftwgrid); #ifdef ONE_SIDE MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #else MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); timing_wt.fftw += CPU_TIME_wt - start; #endif Loading @@ -98,7 +89,7 @@ void write_fftw_data(){ #ifdef USE_FFTW #ifdef WRITE_DATA // Write results // Write results let's skip this part for the moment #ifdef USE_MPI MPI_Win writewin; MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); Loading Loading @@ -175,19 +166,15 @@ void write_fftw_data(){ // Phase correction clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); double start = CPU_TIME_wt; if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.phase_time1 = (finish.tv_sec - begin.tv_sec); timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; timing_wt.phase += CPU_TIME_wt - start; #ifdef WRITE_IMAGE Loading @@ -198,15 +185,15 @@ void write_fftw_data(){ fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); Loading @@ -224,9 +211,9 @@ void write_fftw_data(){ fclose(file.pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE Loading
gridding.c +0 −1 Original line number Diff line number Diff line Loading @@ -8,7 +8,6 @@ void free_array ( uint *, uint **, int ); void initialize_array ( void ); void gridding_data ( void ); int reduce_ring ( int ); Loading
gridding_std.c +1 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ #if !defined( NCCL_REDUCE ) int reduce_ring (int); // ..................................................................... // Loading
init.c +3 −1 Original line number Diff line number Diff line Loading @@ -5,6 +5,8 @@ void init(int index) { double start_tot; start_tot = CPU_TIME_wt; double begin = CPU_TIME_pr; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization Loading