Loading phase_correction.cu +17 −9 Original line number Diff line number Diff line Loading @@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis, double dwnorm, int xaxistot, int yaxistot, double resolution) double resolution, int nbucket) { long gid = blockIdx.x*blockDim.x + threadIdx.x; double add_term_real; double add_term_img; double wterm; long arraysize = xaxis*yaxis*num_w_planes; long arraysize = (long)((xaxis*yaxis*num_w_planes)/nbucket + 1); if(gid < arraysize) { int iw = (int)(gid/(xaxis*yaxis)); int iv = (int)((gid%(xaxis*yaxis))/xaxis); int iu = (iv%yaxis); long index = 2*gid; long gid_aux = nbucket*gid; for(int iaux=0; iaux<nbucket; iaux++) { int iw = gid_aux/(xaxis*yaxis); int ivaux = gid_aux%(xaxis*yaxis); int iv = ivaux/xaxis; int iu = ivaux%xaxis; long index = 2*gid_aux; long img_index = iu+iv*xaxis; wterm = wmin + iw*dw; Loading Loading @@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis, atomicAdd(&(image_real[img_index]),gridss[index]); atomicAdd(&(image_imag[img_index]),gridss[index+1]); #endif // end of PHASE_ON gid_aux++; } } } Loading @@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in #ifdef __CUDACC__ int nbucket = 32; int Nth = NTHREADS; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); Loading Loading @@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in dwnorm, xaxistot, yaxistot, resolution); resolution, nbucket); mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); Loading w-stacking-fftw.c +4 −4 Original line number Diff line number Diff line Loading @@ -94,8 +94,8 @@ int main(int argc, char * argv[]) double resolution; // MESH SIZE int grid_size_x = 256; int grid_size_y = 256; int grid_size_x = 2048; int grid_size_y = 2048; int local_grid_size_x;// = 8; int local_grid_size_y;// = 8; int xaxis; Loading Loading @@ -153,9 +153,9 @@ int main(int argc, char * argv[]) // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); Loading Loading
phase_correction.cu +17 −9 Original line number Diff line number Diff line Loading @@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis, double dwnorm, int xaxistot, int yaxistot, double resolution) double resolution, int nbucket) { long gid = blockIdx.x*blockDim.x + threadIdx.x; double add_term_real; double add_term_img; double wterm; long arraysize = xaxis*yaxis*num_w_planes; long arraysize = (long)((xaxis*yaxis*num_w_planes)/nbucket + 1); if(gid < arraysize) { int iw = (int)(gid/(xaxis*yaxis)); int iv = (int)((gid%(xaxis*yaxis))/xaxis); int iu = (iv%yaxis); long index = 2*gid; long gid_aux = nbucket*gid; for(int iaux=0; iaux<nbucket; iaux++) { int iw = gid_aux/(xaxis*yaxis); int ivaux = gid_aux%(xaxis*yaxis); int iv = ivaux/xaxis; int iu = ivaux%xaxis; long index = 2*gid_aux; long img_index = iu+iv*xaxis; wterm = wmin + iw*dw; Loading Loading @@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis, atomicAdd(&(image_real[img_index]),gridss[index]); atomicAdd(&(image_imag[img_index]),gridss[index+1]); #endif // end of PHASE_ON gid_aux++; } } } Loading @@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in #ifdef __CUDACC__ int nbucket = 32; int Nth = NTHREADS; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); Loading Loading @@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in dwnorm, xaxistot, yaxistot, resolution); resolution, nbucket); mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); Loading
w-stacking-fftw.c +4 −4 Original line number Diff line number Diff line Loading @@ -94,8 +94,8 @@ int main(int argc, char * argv[]) double resolution; // MESH SIZE int grid_size_x = 256; int grid_size_y = 256; int grid_size_x = 2048; int grid_size_y = 2048; int local_grid_size_x;// = 8; int local_grid_size_y;// = 8; int xaxis; Loading Loading @@ -153,9 +153,9 @@ int main(int argc, char * argv[]) // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); Loading