Loading w-stacking.cu +38 −8 Original line number Diff line number Diff line Loading @@ -114,7 +114,13 @@ __global__ void convolve_g( int grid_size_x, int grid_size_y, double* grid, double std22) #if defined(GAUSS_HI_PRECISION) double std22 #else double std22, double* convkernel #endif ) Loading Loading @@ -296,11 +302,11 @@ void wstack( mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float)); mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float)); //mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); /* #if !defined(GAUSS_HI_PRECISION) mmm=cudaMalloc(&convkernel_g,increaseprecision*w_support*sizeof(double)); #endif */ if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);} //mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);} Loading @@ -313,14 +319,15 @@ void wstack( mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking); mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking); /* #if !defined(GAUSS_HI_PRECISION) mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecision*w_support*sizeof(double), cudaMemcpyHostToDevice, stream_stacking); #endif */ if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);} // Call main GPU Kernel #if defined(GAUSS_HI_PRECISION) convolve_g <<<Nbl,Nth,0,stream_stacking>>> ( num_w_planes, num_points, Loading @@ -340,6 +347,29 @@ void wstack( grid, std22 ); #else convolve_g <<<Nbl,Nth,0,stream_stacking>>> ( num_w_planes, num_points, freq_per_chan, polarizations, uu_g, vv_g, ww_g, vis_real_g, vis_img_g, weight_g, dx, dw, KernelLen, grid_size_x, grid_size_y, grid, std22, convkernel_g ); #endif mmm=cudaStreamSynchronize(stream_stacking); //Record the event Loading @@ -360,11 +390,11 @@ void wstack( mmm=cudaFree(vis_img_g); mmm=cudaFree(weight_g); //mmm=cudaFree(grid_g); /* #if !defined(GAUSS_HI_PRECISION) mmm=cudaFree(convkernel_g); #endif */ // Switch between CUDA and GPU versions # else Loading Loading
w-stacking.cu +38 −8 Original line number Diff line number Diff line Loading @@ -114,7 +114,13 @@ __global__ void convolve_g( int grid_size_x, int grid_size_y, double* grid, double std22) #if defined(GAUSS_HI_PRECISION) double std22 #else double std22, double* convkernel #endif ) Loading Loading @@ -296,11 +302,11 @@ void wstack( mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float)); mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float)); //mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); /* #if !defined(GAUSS_HI_PRECISION) mmm=cudaMalloc(&convkernel_g,increaseprecision*w_support*sizeof(double)); #endif */ if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);} //mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);} Loading @@ -313,14 +319,15 @@ void wstack( mmm=cudaMemcpyAsync(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice, stream_stacking); mmm=cudaMemcpyAsync(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice, stream_stacking); /* #if !defined(GAUSS_HI_PRECISION) mmm=cudaMemcpyAsync(convkernel_g, convkernel, increaseprecision*w_support*sizeof(double), cudaMemcpyHostToDevice, stream_stacking); #endif */ if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpyAsync ERROR %d !!!\n", mmm);} // Call main GPU Kernel #if defined(GAUSS_HI_PRECISION) convolve_g <<<Nbl,Nth,0,stream_stacking>>> ( num_w_planes, num_points, Loading @@ -340,6 +347,29 @@ void wstack( grid, std22 ); #else convolve_g <<<Nbl,Nth,0,stream_stacking>>> ( num_w_planes, num_points, freq_per_chan, polarizations, uu_g, vv_g, ww_g, vis_real_g, vis_img_g, weight_g, dx, dw, KernelLen, grid_size_x, grid_size_y, grid, std22, convkernel_g ); #endif mmm=cudaStreamSynchronize(stream_stacking); //Record the event Loading @@ -360,11 +390,11 @@ void wstack( mmm=cudaFree(vis_img_g); mmm=cudaFree(weight_g); //mmm=cudaFree(grid_g); /* #if !defined(GAUSS_HI_PRECISION) mmm=cudaFree(convkernel_g); #endif */ // Switch between CUDA and GPU versions # else Loading