Loading w-stacking_omp.c 0 → 100644 +150 −0 Original line number Diff line number Diff line #include <omp.h> #include "w-stacking_omp.h" #include <math.h> #include <stdlib.h> #include <stdio.h> #ifdef ACCOMP #pragma omp declare target #endif double gauss_kernel_norm(double norm, double std22, double u_dist, double v_dist) { double conv_weight; conv_weight = norm * exp(-((u_dist*u_dist)+(v_dist*v_dist))*std22); return conv_weight; } #ifdef ACCOMP #pragma omp end declare target #endif #ifdef ACCOMP #pragma omp declare target #endif void wstack( int num_w_planes, long num_points, long freq_per_chan, long polarizations, double* uu, double* vv, double* ww, float* vis_real, float* vis_img, float* weight, double dx, double dw, int w_support, int grid_size_x, int grid_size_y, double* grid, int num_threads) { long i; long index; long visindex; // initialize the convolution kernel // gaussian: int KernelLen = (w_support-1)/2; double std = 1.0; double std22 = 1.0/(2.0*std*std); double norm = std22/PI; // Loop over visibilities. #ifdef _OPENMP omp_set_num_threads(num_threads); #endif #ifdef ACCOMP long Nvis = num_points*freq_per_chan*polarizations; // #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) #endif for (i = 0; i < num_points; i++) { #ifdef _OPENMP //int tid; //tid = omp_get_thread_num(); //printf("%d\n",tid); #endif visindex = i*freq_per_chan*polarizations; double sum = 0.0; int j, k; //if (i%1000 == 0)printf("%ld\n",i); /* Convert UV coordinates to grid coordinates. */ double pos_u = uu[i] / dx; double pos_v = vv[i] / dx; double ww_i = ww[i] / dw; int grid_w = (int)ww_i; int grid_u = (int)pos_u; int grid_v = (int)pos_v; // check the boundaries long jmin = (grid_u > KernelLen - 1) ? grid_u - KernelLen : 0; long jmax = (grid_u < grid_size_x - KernelLen) ? grid_u + KernelLen : grid_size_x - 1; long kmin = (grid_v > KernelLen - 1) ? grid_v - KernelLen : 0; long kmax = (grid_v < grid_size_y - KernelLen) ? grid_v + KernelLen : grid_size_y - 1; //printf("%d, %ld, %ld, %d, %ld, %ld\n",grid_u,jmin,jmax,grid_v,kmin,kmax); // Convolve this point onto the grid. for (k = kmin; k <= kmax; k++) { double v_dist = (double)k+0.5 - pos_v; for (j = jmin; j <= jmax; j++) { double u_dist = (double)j+0.5 - pos_u; long iKer = 2 * (j + k*grid_size_x + grid_w*grid_size_x*grid_size_y); double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist); // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; long ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (long ifreq=0; ifreq<freq_per_chan; ifreq++) { long iweight = visindex/freq_per_chan; for (long ipol=0; ipol<polarizations; ipol++) { if (!isnan(vis_real[ifine])) { //printf("%f %ld\n",weight[iweight],iweight); add_term_real += weight[iweight] * vis_real[ifine] * conv_weight; add_term_img += weight[iweight] * vis_img[ifine] * conv_weight; //if(vis_img[ifine]>1e10 || vis_img[ifine]<-1e10)printf("%f %f %f %f %ld %ld\n",vis_real[ifine],vis_img[ifine],weight[iweight],conv_weight,ifine,num_points*freq_per_chan*polarizations); } ifine++; iweight++; } } // DAV: this is the critical call in terms of correctness of the results and of performance #pragma omp atomic grid[iKer] += add_term_real; #pragma omp atomic grid[iKer+1] += add_term_img; } } } //for (int i=0; i<100000; i++)printf("%f\n",grid[i]); } #ifdef ACCOMP #pragma omp end declare target #endif int test(int nnn) { int mmm; mmm = nnn+1; return mmm; } w-stacking_omp.h 0 → 100644 +45 −0 Original line number Diff line number Diff line #ifndef W_PROJECT_H_ #define W_PROJECT_H_ #endif #define NWORKERS -1 //100 #define NTHREADS 32 #define PI 3.14159265359 #define REAL_TYPE double void wstack( int, long, long, long, double*, double*, double*, float*, float*, float*, double, double, int, int, int, double*, int); int test(int nnn); double gauss_kernel_norm( double norm, double std22, double u_dist, double v_dist); void phase_correction( double*, double*, double*, int, int, int, int, int); Loading
w-stacking_omp.c 0 → 100644 +150 −0 Original line number Diff line number Diff line #include <omp.h> #include "w-stacking_omp.h" #include <math.h> #include <stdlib.h> #include <stdio.h> #ifdef ACCOMP #pragma omp declare target #endif double gauss_kernel_norm(double norm, double std22, double u_dist, double v_dist) { double conv_weight; conv_weight = norm * exp(-((u_dist*u_dist)+(v_dist*v_dist))*std22); return conv_weight; } #ifdef ACCOMP #pragma omp end declare target #endif #ifdef ACCOMP #pragma omp declare target #endif void wstack( int num_w_planes, long num_points, long freq_per_chan, long polarizations, double* uu, double* vv, double* ww, float* vis_real, float* vis_img, float* weight, double dx, double dw, int w_support, int grid_size_x, int grid_size_y, double* grid, int num_threads) { long i; long index; long visindex; // initialize the convolution kernel // gaussian: int KernelLen = (w_support-1)/2; double std = 1.0; double std22 = 1.0/(2.0*std*std); double norm = std22/PI; // Loop over visibilities. #ifdef _OPENMP omp_set_num_threads(num_threads); #endif #ifdef ACCOMP long Nvis = num_points*freq_per_chan*polarizations; // #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) #endif for (i = 0; i < num_points; i++) { #ifdef _OPENMP //int tid; //tid = omp_get_thread_num(); //printf("%d\n",tid); #endif visindex = i*freq_per_chan*polarizations; double sum = 0.0; int j, k; //if (i%1000 == 0)printf("%ld\n",i); /* Convert UV coordinates to grid coordinates. */ double pos_u = uu[i] / dx; double pos_v = vv[i] / dx; double ww_i = ww[i] / dw; int grid_w = (int)ww_i; int grid_u = (int)pos_u; int grid_v = (int)pos_v; // check the boundaries long jmin = (grid_u > KernelLen - 1) ? grid_u - KernelLen : 0; long jmax = (grid_u < grid_size_x - KernelLen) ? grid_u + KernelLen : grid_size_x - 1; long kmin = (grid_v > KernelLen - 1) ? grid_v - KernelLen : 0; long kmax = (grid_v < grid_size_y - KernelLen) ? grid_v + KernelLen : grid_size_y - 1; //printf("%d, %ld, %ld, %d, %ld, %ld\n",grid_u,jmin,jmax,grid_v,kmin,kmax); // Convolve this point onto the grid. for (k = kmin; k <= kmax; k++) { double v_dist = (double)k+0.5 - pos_v; for (j = jmin; j <= jmax; j++) { double u_dist = (double)j+0.5 - pos_u; long iKer = 2 * (j + k*grid_size_x + grid_w*grid_size_x*grid_size_y); double conv_weight = gauss_kernel_norm(norm,std22,u_dist,v_dist); // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; long ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (long ifreq=0; ifreq<freq_per_chan; ifreq++) { long iweight = visindex/freq_per_chan; for (long ipol=0; ipol<polarizations; ipol++) { if (!isnan(vis_real[ifine])) { //printf("%f %ld\n",weight[iweight],iweight); add_term_real += weight[iweight] * vis_real[ifine] * conv_weight; add_term_img += weight[iweight] * vis_img[ifine] * conv_weight; //if(vis_img[ifine]>1e10 || vis_img[ifine]<-1e10)printf("%f %f %f %f %ld %ld\n",vis_real[ifine],vis_img[ifine],weight[iweight],conv_weight,ifine,num_points*freq_per_chan*polarizations); } ifine++; iweight++; } } // DAV: this is the critical call in terms of correctness of the results and of performance #pragma omp atomic grid[iKer] += add_term_real; #pragma omp atomic grid[iKer+1] += add_term_img; } } } //for (int i=0; i<100000; i++)printf("%f\n",grid[i]); } #ifdef ACCOMP #pragma omp end declare target #endif int test(int nnn) { int mmm; mmm = nnn+1; return mmm; }
w-stacking_omp.h 0 → 100644 +45 −0 Original line number Diff line number Diff line #ifndef W_PROJECT_H_ #define W_PROJECT_H_ #endif #define NWORKERS -1 //100 #define NTHREADS 32 #define PI 3.14159265359 #define REAL_TYPE double void wstack( int, long, long, long, double*, double*, double*, float*, float*, float*, double, double, int, int, int, double*, int); int test(int nnn); double gauss_kernel_norm( double norm, double std22, double u_dist, double v_dist); void phase_correction( double*, double*, double*, int, int, int, int, int);