Kaiser-Bessel kernel on GPU (4eb0affa) · Commits · Claudio Gheller / HPC_Imaging

w-stacking.cu

+38 −8

Original line number	Diff line number	Diff line
		@@ -114,7 +114,13 @@ __global__ void convolve_g(
		int grid_size_x,
		int grid_size_y,
		double* grid,
		double std22)
		#if defined(GAUSS_HI_PRECISION)
		double std22
		#else
		double std22,
		double* convkernel
		#endif
		)



		@@ -296,11 +302,11 @@ void wstack(
		mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float));
		mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float));
		//mmm=cudaMalloc(&grid_g,2num_w_planesgrid_size_xgrid_size_ysizeof(double));
		/*

		#if !defined(GAUSS_HI_PRECISION)
		mmm=cudaMalloc(&convkernel_g,increaseprecisionw_supportsizeof(double));
		#endif
		*/

		if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);}
		//mmm=cudaMemset(grid_g,0.0,2num_w_planesgrid_size_xgrid_size_ysizeof(double));
		if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);}
		@@ -313,14 +319,15 @@ void wstack(
		mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);
		mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);

		/*

		#if !defined(GAUSS_HI_PRECISION)
		mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecisionw_supportsizeof(double), cudaMemcpyHostToDevice, stream_stacking);
		#endif
		*/

		if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);}

		// Call main GPU Kernel
		#if defined(GAUSS_HI_PRECISION)
		convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
		num_w_planes,
		num_points,
		@@ -340,6 +347,29 @@ void wstack(
		grid,
		std22
		);
		#else
		convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
		num_w_planes,
		num_points,
		freq_per_chan,
		polarizations,
		uu_g,
		vv_g,
		ww_g,
		vis_real_g,
		vis_img_g,
		weight_g,
		dx,
		dw,
		KernelLen,
		grid_size_x,
		grid_size_y,
		grid,
		std22,
		convkernel_g
		);
		#endif


		mmm=cudaStreamSynchronize(stream_stacking);
		//Record the event
		@@ -360,11 +390,11 @@ void wstack(
		mmm=cudaFree(vis_img_g);
		mmm=cudaFree(weight_g);
		//mmm=cudaFree(grid_g);
		/*

		#if !defined(GAUSS_HI_PRECISION)
		mmm=cudaFree(convkernel_g);
		#endif
		*/

		// Switch between CUDA and GPU versions
		# else