Commit fa112e57 authored by Giuliano Taffoni's avatar Giuliano Taffoni
Browse files

OMP GPU acceleration basis implementation

parent 997121ca
Loading
Loading
Loading
Loading

w-stacking_omp.c

0 → 100644
+150 −0
Original line number Diff line number Diff line
#include <omp.h>
#include "w-stacking_omp.h"
#include <math.h>
#include <stdlib.h>
#include <stdio.h>

#ifdef ACCOMP
#pragma omp  declare target
#endif
double gauss_kernel_norm(double norm, double std22, double u_dist, double v_dist)
{
     double conv_weight;
     conv_weight = norm * exp(-((u_dist*u_dist)+(v_dist*v_dist))*std22);
     return conv_weight;
}
#ifdef ACCOMP
#pragma omp end declare target
#endif

#ifdef ACCOMP
#pragma  omp declare target
#endif
void wstack(
     int num_w_planes,
     long num_points,
     long freq_per_chan,
     long polarizations,
     double* uu,
     double* vv,
     double* ww,
     float* vis_real,
     float* vis_img,
     float* weight,
     double dx,
     double dw,
     int w_support,
     int grid_size_x,
     int grid_size_y,
     double* grid,
     int num_threads)
{
    long i;
    long index;
    long visindex;

    // initialize the convolution kernel
    // gaussian:
    int KernelLen = (w_support-1)/2;
    double std = 1.0;
    double std22 = 1.0/(2.0*std*std);
    double norm = std22/PI;

    // Loop over visibilities.
#ifdef _OPENMP
    omp_set_num_threads(num_threads);
#endif

#ifdef ACCOMP
    long Nvis = num_points*freq_per_chan*polarizations;
  //  #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan])
    #pragma omp target teams distribute parallel for private(visindex)  map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
    #pragma omp parallel for private(visindex)
#endif
    for (i = 0; i < num_points; i++)
    {
#ifdef _OPENMP
	//int tid;
	//tid = omp_get_thread_num();
	//printf("%d\n",tid);
#endif

        visindex = i*freq_per_chan*polarizations;

        double sum = 0.0;
        int j, k;
	//if (i%1000 == 0)printf("%ld\n",i);

        /* Convert UV coordinates to grid coordinates. */
        double pos_u = uu[i] / dx;
        double pos_v = vv[i] / dx;
        double ww_i  = ww[i] / dw;
        int grid_w = (int)ww_i;
        int grid_u = (int)pos_u;
        int grid_v = (int)pos_v;

	// check the boundaries
	long jmin = (grid_u > KernelLen - 1) ? grid_u - KernelLen : 0;
	long jmax = (grid_u < grid_size_x - KernelLen) ? grid_u + KernelLen : grid_size_x - 1;
	long kmin = (grid_v > KernelLen - 1) ? grid_v - KernelLen : 0;
	long kmax = (grid_v < grid_size_y - KernelLen) ? grid_v + KernelLen : grid_size_y - 1;
        //printf("%d, %ld, %ld, %d, %ld, %ld\n",grid_u,jmin,jmax,grid_v,kmin,kmax);


        // Convolve this point onto the grid.
        for (k = kmin; k <= kmax; k++)
        {

            double v_dist = (double)k+0.5 - pos_v;

            for (j = jmin; j <= jmax; j++)
            {
                double u_dist = (double)j+0.5 - pos_u;
		long iKer = 2 * (j + k*grid_size_x + grid_w*grid_size_x*grid_size_y);


		double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist);
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		long ifine = visindex;
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
		for (long ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
		   long iweight = visindex/freq_per_chan;
	           for (long ipol=0; ipol<polarizations; ipol++)
	           {
                      if (!isnan(vis_real[ifine]))
                      {
		         //printf("%f %ld\n",weight[iweight],iweight);
                         add_term_real += weight[iweight] * vis_real[ifine] * conv_weight;
		         add_term_img += weight[iweight] * vis_img[ifine] * conv_weight;
			 //if(vis_img[ifine]>1e10 || vis_img[ifine]<-1e10)printf("%f %f %f %f %ld %ld\n",vis_real[ifine],vis_img[ifine],weight[iweight],conv_weight,ifine,num_points*freq_per_chan*polarizations);
		      }
		      ifine++;
		      iweight++;
		   }
	        }
		// DAV: this is the critical call in terms of correctness of the results and of performance
		#pragma omp atomic
		grid[iKer] += add_term_real;
		#pragma omp atomic
		grid[iKer+1] += add_term_img;
            }
        }

    }
    //for (int i=0; i<100000; i++)printf("%f\n",grid[i]);
}
#ifdef ACCOMP
#pragma  omp end declare target
#endif

int test(int nnn)
{
	int mmm;

	mmm = nnn+1;
	return mmm;
}

w-stacking_omp.h

0 → 100644
+45 −0
Original line number Diff line number Diff line
#ifndef W_PROJECT_H_
#define W_PROJECT_H_
#endif

#define NWORKERS -1    //100
#define NTHREADS 32
#define PI 3.14159265359
#define REAL_TYPE double

void wstack(
     int,
     long,
     long,
     long,
     double*,
     double*,
     double*,
     float*,
     float*,
     float*,
     double,
     double,
     int,
     int,
     int,
     double*,
     int);

int test(int nnn);

double gauss_kernel_norm(
  double norm,
  double std22,
  double u_dist,
  double v_dist);

void phase_correction(
     double*,
     double*,
     double*,
     int,
     int,
     int,
     int,
     int);