Commit 4eb0affa authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Kaiser-Bessel kernel on GPU

parent 732d1d44
Loading
Loading
Loading
Loading
+38 −8
Original line number Diff line number Diff line
@@ -114,7 +114,13 @@ __global__ void convolve_g(
			   int grid_size_x,
			   int grid_size_y,
			   double* grid,
			   double std22)
			  #if defined(GAUSS_HI_PRECISION)
			   double std22
			  #else
			   double std22,
			   double* convkernel
			  #endif
			   )
			   


@@ -296,11 +302,11 @@ void wstack(
    mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float));
    mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float));
    //mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double));
    /*

   #if !defined(GAUSS_HI_PRECISION)
    mmm=cudaMalloc(&convkernel_g,increaseprecision*w_support*sizeof(double));
   #endif
    */

    if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);}
    //mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double));
    if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);}
@@ -313,14 +319,15 @@ void wstack(
    mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);
    mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking);

    /*

   #if !defined(GAUSS_HI_PRECISION)
    mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecision*w_support*sizeof(double), cudaMemcpyHostToDevice, stream_stacking);
   #endif
    */

    if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);}
    
    // Call main GPU Kernel
   #if defined(GAUSS_HI_PRECISION)
    convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
	       num_w_planes,
               num_points,
@@ -340,6 +347,29 @@ void wstack(
               grid,
	       std22
						);
   #else
    convolve_g <<<Nbl,Nth,0,stream_stacking>>> (
	       num_w_planes,
               num_points,
               freq_per_chan,
               polarizations,
               uu_g,
               vv_g,
               ww_g,
               vis_real_g,
               vis_img_g,
               weight_g,
               dx,
               dw,
               KernelLen,
               grid_size_x,
               grid_size_y,
               grid,
	       std22,
	       convkernel_g
						);
   #endif
    
    
    mmm=cudaStreamSynchronize(stream_stacking);
    //Record the event
@@ -360,11 +390,11 @@ void wstack(
    mmm=cudaFree(vis_img_g);
    mmm=cudaFree(weight_g);
    //mmm=cudaFree(grid_g);
    /*
    
   #if !defined(GAUSS_HI_PRECISION)
    mmm=cudaFree(convkernel_g);
   #endif
    */
    
// Switch between CUDA and GPU versions
# else