OMP GPU acceleration basis implementation (fa112e57) · Commits · Claudio Gheller / HPC_Imaging

w-stacking_omp.c

0 → 100644

+150 −0

Original line number	Diff line number	Diff line
		#include <omp.h>
		#include "w-stacking_omp.h"
		#include <math.h>
		#include <stdlib.h>
		#include <stdio.h>

		#ifdef ACCOMP
		#pragma omp declare target
		#endif
		double gauss_kernel_norm(double norm, double std22, double u_dist, double v_dist)
		{
		double conv_weight;
		conv_weight = norm * exp(-((u_distu_dist)+(v_distv_dist))*std22);
		return conv_weight;
		}
		#ifdef ACCOMP
		#pragma omp end declare target
		#endif

		#ifdef ACCOMP
		#pragma omp declare target
		#endif
		void wstack(
		int num_w_planes,
		long num_points,
		long freq_per_chan,
		long polarizations,
		double* uu,
		double* vv,
		double* ww,
		float* vis_real,
		float* vis_img,
		float* weight,
		double dx,
		double dw,
		int w_support,
		int grid_size_x,
		int grid_size_y,
		double* grid,
		int num_threads)
		{
		long i;
		long index;
		long visindex;

		// initialize the convolution kernel
		// gaussian:
		int KernelLen = (w_support-1)/2;
		double std = 1.0;
		double std22 = 1.0/(2.0stdstd);
		double norm = std22/PI;

		// Loop over visibilities.
		#ifdef _OPENMP
		omp_set_num_threads(num_threads);
		#endif

		#ifdef ACCOMP
		long Nvis = num_pointsfreq_per_chanpolarizations;
		// #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan])
		#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2num_w_planesgrid_size_x*grid_size_y])
		#else
		#pragma omp parallel for private(visindex)
		#endif
		for (i = 0; i < num_points; i++)
		{
		#ifdef _OPENMP
		//int tid;
		//tid = omp_get_thread_num();
		//printf("%d\n",tid);
		#endif

		visindex = ifreq_per_chanpolarizations;

		double sum = 0.0;
		int j, k;
		//if (i%1000 == 0)printf("%ld\n",i);

		/* Convert UV coordinates to grid coordinates. */
		double pos_u = uu[i] / dx;
		double pos_v = vv[i] / dx;
		double ww_i = ww[i] / dw;
		int grid_w = (int)ww_i;
		int grid_u = (int)pos_u;
		int grid_v = (int)pos_v;

		// check the boundaries
		long jmin = (grid_u > KernelLen - 1) ? grid_u - KernelLen : 0;
		long jmax = (grid_u < grid_size_x - KernelLen) ? grid_u + KernelLen : grid_size_x - 1;
		long kmin = (grid_v > KernelLen - 1) ? grid_v - KernelLen : 0;
		long kmax = (grid_v < grid_size_y - KernelLen) ? grid_v + KernelLen : grid_size_y - 1;
		//printf("%d, %ld, %ld, %d, %ld, %ld\n",grid_u,jmin,jmax,grid_v,kmin,kmax);


		// Convolve this point onto the grid.
		for (k = kmin; k <= kmax; k++)
		{

		double v_dist = (double)k+0.5 - pos_v;

		for (j = jmin; j <= jmax; j++)
		{
		double u_dist = (double)j+0.5 - pos_u;
		long iKer = 2 * (j + kgrid_size_x + grid_wgrid_size_x*grid_size_y);


		double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist);
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		long ifine = visindex;
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
		for (long ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
		long iweight = visindex/freq_per_chan;
		for (long ipol=0; ipol<polarizations; ipol++)
		{
		if (!isnan(vis_real[ifine]))
		{
		//printf("%f %ld\n",weight[iweight],iweight);
		add_term_real += weight[iweight] * vis_real[ifine] * conv_weight;
		add_term_img += weight[iweight] * vis_img[ifine] * conv_weight;
		//if(vis_img[ifine]>1e10 \|\| vis_img[ifine]<-1e10)printf("%f %f %f %f %ld %ld\n",vis_real[ifine],vis_img[ifine],weight[iweight],conv_weight,ifine,num_pointsfreq_per_chanpolarizations);
		}
		ifine++;
		iweight++;
		}
		}
		// DAV: this is the critical call in terms of correctness of the results and of performance
		#pragma omp atomic
		grid[iKer] += add_term_real;
		#pragma omp atomic
		grid[iKer+1] += add_term_img;
		}
		}

		}
		//for (int i=0; i<100000; i++)printf("%f\n",grid[i]);
		}
		#ifdef ACCOMP
		#pragma omp end declare target
		#endif

		int test(int nnn)
		{
		int mmm;

		mmm = nnn+1;
		return mmm;
		}

w-stacking_omp.h

0 → 100644

+45 −0

Original line number	Diff line number	Diff line
		#ifndef W_PROJECT_H_
		#define W_PROJECT_H_
		#endif

		#define NWORKERS -1 //100
		#define NTHREADS 32
		#define PI 3.14159265359
		#define REAL_TYPE double

		void wstack(
		int,
		long,
		long,
		long,
		double*,
		double*,
		double*,
		float*,
		float*,
		float*,
		double,
		double,
		int,
		int,
		int,
		double*,
		int);

		int test(int nnn);

		double gauss_kernel_norm(
		double norm,
		double std22,
		double u_dist,
		double v_dist);

		void phase_correction(
		double*,
		double*,
		double*,
		int,
		int,
		int,
		int,
		int);