Commit 3835bd76 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Checking GPU scalability

parent 889e3c95
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -190,7 +190,6 @@ void gridding_data(){
      printf("Processing sector %ld\n",isector);
     #endif

      start = CPU_TIME_wt;
   
      double *stacking_target_array;
      if ( size > 1 )
@@ -198,6 +197,8 @@ void gridding_data(){
      else
	stacking_target_array = grid;

      start = CPU_TIME_wt;
	    
     //We have to call different GPUs per MPI task!!! [GL]
      wstack(param.num_w_planes,
	     Nsec,
+4 −2
Original line number Diff line number Diff line
@@ -234,13 +234,15 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
	}

#else
	omp_set_default_device(rank % omp_get_num_devices());
	
       #if !defined(__clang__)

       #pragma omp target teams distribute parallel for collapse(2) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #pragma omp target teams distribute parallel for collapse(2) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis])

       #else

       #pragma omp target teams distribute parallel for collapse(2) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #pragma omp target teams distribute parallel for collapse(2) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis])
       #endif
	
	for (int iw=0; iw<num_w_planes; iw++)
+9 −9
Original line number Diff line number Diff line
@@ -5,12 +5,9 @@
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include "errcodes.h"

#ifdef __CUDACC__
#include "allvars_nccl.h"
#else
#include "allvars.h"
#endif

#include "proto.h"
@@ -209,7 +206,7 @@ void wstack(
     int rank)
{
    uint i;
    uint index;
    //uint index;
    uint visindex;

    // initialize the convolution kernel
@@ -220,13 +217,14 @@ void wstack(
    double std22 = 1.0/(2.0*std*std);
    double norm = std22/PI;
    double * convkernel = (double*)malloc(increaseprecision*w_support*sizeof(*convkernel));
    double overSamplingFactor = 1.0;
    int withSinc = 0;
    double alpha = 8.6;

    #ifdef GAUSS
    makeGaussKernel(convkernel,w_support,increaseprecision,std22);
    #endif
    #ifdef KAISERBESSEL
    double overSamplingFactor = 1.0;
    int withSinc = 0;
    double alpha = 8.6;
    makeKaiserBesselKernel(convkernel, w_support, increaseprecision, alpha, overSamplingFactor, withSinc);
    #endif

@@ -328,9 +326,9 @@ void wstack(
#endif

#ifdef ACCOMP
    omp_set_default_device(rank % omp_get_num_devices());
    uint Nvis = num_points*freq_per_chan*polarizations;
  //  #pragma omp target data map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan])
  //  #pragma omp target teams distribute parallel for  map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y])
   #pragma omp target teams distribute parallel for private(visindex)  map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom: grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
    #pragma omp parallel for private(visindex)
#endif
@@ -422,6 +420,8 @@ void wstack(
        }
	
    }

    #pragma omp target exit data map(delete:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan],grid[0:2*num_w_planes*grid_size_x*grid_size_y]) 
    // End switch between CUDA and CPU versions
#endif
    //for (int i=0; i<100000; i++)printf("%f\n",grid[i]);