Commit cd33eaa6 authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Fixed data types and print intermediate data (AMD & CUDA version)

parent a2a03413
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -121,7 +121,7 @@ extern struct meta
{

  myuint   Nmeasures;
  myuint   Nvis;
  myull   Nvis;
  myuint   Nweights;
  myuint   freq_per_chan;
  myuint   polarisations;
+14 −14
Original line number Diff line number Diff line
@@ -71,8 +71,8 @@ typedef double float_t;
typedef float float_t;
#endif

typedef unsigned int       uint;
typedef unsigned long long ull;
typedef unsigned int       myuint;
typedef unsigned long long myull;


extern struct io
@@ -115,15 +115,15 @@ extern struct op
extern struct meta
{

  uint   Nmeasures;
  uint   Nvis;
  uint   Nweights;
  uint   freq_per_chan;
  uint   polarisations;
  uint   Ntimes;
  myuint   Nmeasures;
  myull   Nvis;
  myuint   Nweights;
  myuint   freq_per_chan;
  myuint   polarisations;
  myuint   Ntimes;
  double dt;
  double thours;
  uint   baselines;
  myuint   baselines;
  double uvmin;
  double uvmax;
  double wmin;
@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
extern int  xaxis, yaxis;
extern int  rank;
extern int  size;
extern uint nsectors;
extern uint startrow;
extern myuint nsectors;
extern myuint startrow;
extern double_t resolution, dx, dw, w_supporth;

extern uint **sectorarray;
extern uint  *histo_send;
extern myuint **sectorarray;
extern myuint  *histo_send;
extern int    verbose_level; 


extern uint    size_of_grid;
extern myuint    size_of_grid;
extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu;

extern MPI_Comm MYMPI_COMM_WORLD;
+6 −6
Original line number Diff line number Diff line
@@ -134,7 +134,7 @@ __global__ void convolve_g(
  if(gid < num_points)
    {
      myuint i = gid;
      unsigned long visindex = i*freq_per_chan*polarizations;
      myull visindex = i*freq_per_chan*polarizations;
      double norm = std22/PI;

      int j, k;
@@ -182,7 +182,7 @@ __global__ void convolve_g(
	      // Loops over frequencies and polarizations
	      double add_term_real = 0.0;
	      double add_term_img = 0.0;
	      unsigned long ifine = visindex;
	      myull ifine = visindex;
	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
		  myuint iweight = visindex/freq_per_chan;
@@ -245,7 +245,7 @@ void wstack(
{
  myuint i;
  //myuint index;
  unsigned long visindex;
  myull visindex;
  
  // initialize the convolution kernel
  // gaussian:
@@ -274,7 +274,7 @@ void wstack(
  int Nth = NTHREADS;
  myuint Nbl = (myuint)(num_points/Nth) + 1;
  if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
  unsigned long Nvis = num_points*freq_per_chan*polarizations;
  myull Nvis = num_points*freq_per_chan*polarizations;
  
  int ndevices;
  cudaGetDeviceCount(&ndevices);
@@ -441,7 +441,7 @@ void wstack(

#if defined(ACCOMP) && (GPU_STACKING)
  omp_set_default_device(rank % omp_get_num_devices());
  myuint Nvis = num_points*freq_per_chan*polarizations;
  myull Nvis = num_points*freq_per_chan*polarizations;
#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
#pragma omp parallel for private(visindex)
@@ -507,7 +507,7 @@ void wstack(
	      // Loops over frequencies and polarizations
	      double add_term_real = 0.0;
	      double add_term_img = 0.0;
	      unsigned long ifine = visindex;
	      myull ifine = visindex;
	      // DAV: the following two loops are performend by each thread separately: no problems of race conditions
	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
+6 −6
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ __global__ void convolve_g(
  if(gid < num_points)
    {
      myuint i = gid;
      myuint visindex = i*freq_per_chan*polarizations;
      myull visindex = i*freq_per_chan*polarizations;
      double norm = std22/PI;

      int j, k;
@@ -178,7 +178,7 @@ __global__ void convolve_g(
	      // Loops over frequencies and polarizations
	      double add_term_real = 0.0;
	      double add_term_img = 0.0;
	      myuint ifine = visindex;
	      myull ifine = visindex;
	      for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
		  myuint iweight = visindex/freq_per_chan;
@@ -233,7 +233,7 @@ void wstack(
{
    myuint i;
    //myuint index;
    myuint visindex;
    myull visindex;

    // initialize the convolution kernel
    // gaussian:
@@ -262,7 +262,7 @@ void wstack(
    int Nth = NTHREADS;
    myuint Nbl = (myuint)(num_points/Nth) + 1;
    if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
    myuint Nvis = num_points*freq_per_chan*polarizations;
    myull Nvis = num_points*freq_per_chan*polarizations;

    int ndevices;
    int num = hipGetDeviceCount(&ndevices);
@@ -404,7 +404,7 @@ void wstack(

   #if defined(ACCOMP) && (GPU_STACKING)
    omp_set_default_device(rank % omp_get_num_devices());
    myuint Nvis = num_points*freq_per_chan*polarizations;
    myull Nvis = num_points*freq_per_chan*polarizations;
   #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y])
#else
    #pragma omp parallel for private(visindex)
@@ -470,7 +470,7 @@ void wstack(
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		myuint ifine = visindex;
		myull ifine = visindex;
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{