Loading gridding_cpu.c +6 −8 Original line number Diff line number Diff line Loading @@ -142,7 +142,6 @@ void gridding_data() icount++; } double uumin = 1e20; double vvmin = 1e20; double uumax = -1e20; Loading Loading @@ -170,10 +169,11 @@ void gridding_data() vvmax = MAX( vvmax, my_vvmax ); } timing_wt.compose += CPU_TIME_wt - start; //printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); timing_wt.compose += CPU_TIME_wt - start; // Make convolution on the grid #ifdef VERBOSE Loading Loading @@ -209,14 +209,14 @@ void gridding_data() printf("Processed sector %ld\n",isector); #endif start = CPU_TIME_wt; if( size > 1 ) { // Write grid in the corresponding remote slab int target_rank = (int)(isector % size); start = CPU_TIME_wt; if( param.reduce_method == REDUCE_MPI ) MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MYMPI_COMM_WORLD); Loading @@ -241,8 +241,6 @@ void gridding_data() timing_wt.reduce += CPU_TIME_wt - start; } else *grid += *gridss; // Go to next sector memset ( gridss, 0, 2*param.num_w_planes*xaxis*yaxis * sizeof(double) ); Loading gridding_nccl.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -229,8 +229,6 @@ void gridding_data(){ #endif start = CPU_TIME_wt; if( size > 1 ) { Loading @@ -239,6 +237,8 @@ void gridding_data(){ // int target_rank = (int)isector; it implied that size >= nsectors int target_rank = (int)(isector % size); start = CPU_TIME_wt; cudaStreamSynchronize(stream_reduce); ncclReduce(gridss_gpu, grid_gpu, size_of_grid, ncclDouble, ncclSum, target_rank, comm, stream_reduce); Loading gridding_rccl.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -227,8 +227,6 @@ void gridding_data(){ #endif start = CPU_TIME_wt; if( size > 1 ) { Loading @@ -237,6 +235,8 @@ void gridding_data(){ // int target_rank = (int)isector; it implied that size >= nsectors int target_rank = (int)(isector % size); start = CPU_TIME_wt; hipStreamSynchronize(stream_reduce); ncclReduce(gridss_gpu, grid_gpu, size_of_grid, ncclDouble, ncclSum, target_rank, comm, stream_reduce); Loading Loading
gridding_cpu.c +6 −8 Original line number Diff line number Diff line Loading @@ -142,7 +142,6 @@ void gridding_data() icount++; } double uumin = 1e20; double vvmin = 1e20; double uumax = -1e20; Loading Loading @@ -170,10 +169,11 @@ void gridding_data() vvmax = MAX( vvmax, my_vvmax ); } timing_wt.compose += CPU_TIME_wt - start; //printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); timing_wt.compose += CPU_TIME_wt - start; // Make convolution on the grid #ifdef VERBOSE Loading Loading @@ -209,14 +209,14 @@ void gridding_data() printf("Processed sector %ld\n",isector); #endif start = CPU_TIME_wt; if( size > 1 ) { // Write grid in the corresponding remote slab int target_rank = (int)(isector % size); start = CPU_TIME_wt; if( param.reduce_method == REDUCE_MPI ) MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MYMPI_COMM_WORLD); Loading @@ -241,8 +241,6 @@ void gridding_data() timing_wt.reduce += CPU_TIME_wt - start; } else *grid += *gridss; // Go to next sector memset ( gridss, 0, 2*param.num_w_planes*xaxis*yaxis * sizeof(double) ); Loading
gridding_nccl.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -229,8 +229,6 @@ void gridding_data(){ #endif start = CPU_TIME_wt; if( size > 1 ) { Loading @@ -239,6 +237,8 @@ void gridding_data(){ // int target_rank = (int)isector; it implied that size >= nsectors int target_rank = (int)(isector % size); start = CPU_TIME_wt; cudaStreamSynchronize(stream_reduce); ncclReduce(gridss_gpu, grid_gpu, size_of_grid, ncclDouble, ncclSum, target_rank, comm, stream_reduce); Loading
gridding_rccl.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -227,8 +227,6 @@ void gridding_data(){ #endif start = CPU_TIME_wt; if( size > 1 ) { Loading @@ -237,6 +235,8 @@ void gridding_data(){ // int target_rank = (int)isector; it implied that size >= nsectors int target_rank = (int)(isector % size); start = CPU_TIME_wt; hipStreamSynchronize(stream_reduce); ncclReduce(gridss_gpu, grid_gpu, size_of_grid, ncclDouble, ncclSum, target_rank, comm, stream_reduce); Loading