bug fix in the CUDA implementation. nbucket defined to reduce the number of blocks (eacb3f9e) · Commits · Claudio Gheller / HPC_Imaging

phase_correction.cu

+17 −9

Original line number	Diff line number	Diff line
		@@ -19,20 +19,25 @@ __global__ void phase_g(int xaxis,
		double dwnorm,
		int xaxistot,
		int yaxistot,
		double resolution)
		double resolution,
		int nbucket)
		{
		long gid = blockIdx.x*blockDim.x + threadIdx.x;
		double add_term_real;
		double add_term_img;
		double wterm;
		long arraysize = xaxisyaxisnum_w_planes;
		long arraysize = (long)((xaxisyaxisnum_w_planes)/nbucket + 1);

		if(gid < arraysize)
		{
		int iw = (int)(gid/(xaxis*yaxis));
		int iv = (int)((gid%(xaxis*yaxis))/xaxis);
		int iu = (iv%yaxis);
		long index = 2*gid;
		long gid_aux = nbucket*gid;
		for(int iaux=0; iaux<nbucket; iaux++)
		{
		int iw = gid_aux/(xaxis*yaxis);
		int ivaux = gid_aux%(xaxis*yaxis);
		int iv = ivaux/xaxis;
		int iu = ivaux%xaxis;
		long index = 2*gid_aux;
		long img_index = iu+iv*xaxis;

		wterm = wmin + iw*dw;
		@@ -73,7 +78,8 @@ __global__ void phase_g(int xaxis,
		atomicAdd(&(image_real[img_index]),gridss[index]);
		atomicAdd(&(image_imag[img_index]),gridss[index+1]);
		#endif // end of PHASE_ON

		gid_aux++;
		}
		}

		}
		@@ -89,8 +95,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in

		#ifdef __CUDACC__

		int nbucket = 32;
		int Nth = NTHREADS;
		long Nbl = (long)((num_w_planesxaxisyaxis)/Nth) + 1;
		long Nbl = (long)((num_w_planesxaxisyaxis)/Nth/nbucket) + 1;
		if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
		printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);

		@@ -120,7 +127,8 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
		dwnorm,
		xaxistot,
		yaxistot,
		resolution);
		resolution,
		nbucket);

		mmm = cudaMemcpy(image_real, image_real_g, xaxisyaxissizeof(double), cudaMemcpyDeviceToHost);
		mmm = cudaMemcpy(image_imag, image_imag_g, xaxisyaxissizeof(double), cudaMemcpyDeviceToHost);

w-stacking-fftw.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -94,8 +94,8 @@ int main(int argc, char * argv[])
		double resolution;

		// MESH SIZE
		int grid_size_x = 256;
		int grid_size_y = 256;
		int grid_size_x = 2048;
		int grid_size_y = 2048;
		int local_grid_size_x;// = 8;
		int local_grid_size_y;// = 8;
		int xaxis;
		@@ -153,9 +153,9 @@ int main(int argc, char * argv[])

		// INPUT FILES (only the first ndatasets entries are used)
		int ndatasets = 1;
		strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
		//strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/");
		//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/");
		//strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
		strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/");
		//strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/");

		strcpy(datapath,datapath_multi[0]);