Commit eacb3f9e authored by Claudio Gheller's avatar Claudio Gheller
Browse files

bug fix in the CUDA implementation. nbucket defined to reduce the number of blocks

parent 9d47e94e
Loading
Loading
Loading
Loading
+17 −9
Original line number Diff line number Diff line
@@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis,
			double dwnorm,
			int xaxistot,
			int yaxistot,
			double resolution)
			double resolution,
			int nbucket)
{
	long gid = blockIdx.x*blockDim.x + threadIdx.x;
	double add_term_real;
	double add_term_img;
	double wterm;
	long arraysize = xaxis*yaxis*num_w_planes;
	long arraysize = (long)((xaxis*yaxis*num_w_planes)/nbucket + 1);

	if(gid < arraysize)
	{
		int iw = (int)(gid/(xaxis*yaxis));
		int iv = (int)((gid%(xaxis*yaxis))/xaxis);
		int iu = (iv%yaxis);
		long index = 2*gid;
	  long gid_aux = nbucket*gid;
	  for(int iaux=0; iaux<nbucket; iaux++) 
          {
		int iw = gid_aux/(xaxis*yaxis);
		int ivaux = gid_aux%(xaxis*yaxis);
		int iv = ivaux/xaxis;
		int iu = ivaux%xaxis;
		long index = 2*gid_aux;
		long img_index = iu+iv*xaxis;

                wterm = wmin + iw*dw;
@@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis,
		atomicAdd(&(image_real[img_index]),gridss[index]);
		atomicAdd(&(image_imag[img_index]),gridss[index+1]);
#endif // end of PHASE_ON

		gid_aux++;
           }
	}

}
@@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in

#ifdef __CUDACC__

	int nbucket = 32;
        int Nth = NTHREADS;
        long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1;
        long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1;
        if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
        printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
	
@@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
                               dwnorm,
                               xaxistot,
                               yaxistot,
                               resolution);
                               resolution,
			       nbucket);

	mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
	mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
+4 −4
Original line number Diff line number Diff line
@@ -94,8 +94,8 @@ int main(int argc, char * argv[])
	double resolution;

        // MESH SIZE
	int grid_size_x = 256;
	int grid_size_y = 256;
	int grid_size_x = 2048;
	int grid_size_y = 2048;
	int local_grid_size_x;// = 8;
	int local_grid_size_y;// = 8;
	int xaxis;
@@ -153,9 +153,9 @@ int main(int argc, char * argv[])

	// INPUT FILES (only the first ndatasets entries are used)
	int ndatasets = 1;
        strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
        //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
        //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
        //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
        strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
        //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");

	strcpy(datapath,datapath_multi[0]);