Commit 821d1bd6 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Trying to fix the bug also on GPU reduce

parent d50b3ccc
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -58,7 +58,7 @@ OPT += -DPHASE_ON
#OPT += -DNORMALIZE_UVW

# Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
OPT += -DGAUSS_HI_PRECISION
#OPT += -DGAUSS_HI_PRECISION

#OPT += -DGAUSS

@@ -76,7 +76,7 @@ OPT += -DGAUSS_HI_PRECISION
#OPT += -DCUDACC

# use GPU acceleration via OMP 
#OPT += -DACCOMP
OPT += -DACCOMP

# use NVIDIA GPU to perform the reduce
#OPT += -DNCCL_REDUCE
+15 −6
Original line number Diff line number Diff line
@@ -77,7 +77,8 @@ void gridding_data(){

  // allocate sector arrays
  // note: we use the largest allocation among all sectors

  //
  unsigned long long int mem_size = (Nsec*3)*sizeof(double_t) + (Nvissec*2+Nweightss)*sizeof(float_t);
  double_t *memory     = (double*) malloc ( (Nsec*3)*sizeof(double_t) +
                                            (Nvissec*2+Nweightss)*sizeof(float_t) );

@@ -128,6 +129,8 @@ void gridding_data(){

      double start = CPU_TIME_wt;

      memset( memory, 0, mem_size );
      
      // select data for this sector
      uint icount = 0;
      uint ip = 0;
@@ -197,6 +200,12 @@ void gridding_data(){

      start = CPU_TIME_wt;

      double *stacking_target_array;
      if ( size > 1 )
	stacking_target_array = gridss;
      else
	stacking_target_array = grid;
      
     //We have to call different GPUs per MPI task!!! [GL]
      wstack(param.num_w_planes,
	     Nsec,
@@ -213,7 +222,7 @@ void gridding_data(){
	     param.w_support,
	     xaxis,
	     yaxis,
	     gridss,
	     stacking_target_array,
	     param.num_threads,
	     rank);
      //Allocate memory on devices non-blocking for the host                                                                                   
@@ -246,10 +255,10 @@ void gridding_data(){
      
	  timing_wt.reduce += CPU_TIME_wt - start;

	}
		  
	  // Go to next sector
	  memset ( gridss, 0, 2*param.num_w_planes*xaxis*yaxis * sizeof(double) );	  
	}	

    }

+17 −8
Original line number Diff line number Diff line
@@ -75,9 +75,10 @@ void gridding_data(){

  // allocate sector arrays
  // note: we use the largest allocation among all sectors

  double_ty *memory     = (double*) malloc ( (Nsec*3)*sizeof(double_ty) +
                                            (Nvissec*2+Nweightss)*sizeof(float_ty) );
  //
  unsigned long long int mem_size = (Nsec*3)*sizeof(double_t) + (Nvissec*2+Nweightss)*sizeof(float_t);
  double_t *memory     = (double*) malloc ( (Nsec*3)*sizeof(double_t) +
                                            (Nvissec*2+Nweightss)*sizeof(float_t) );

  if ( memory == NULL )
    shutdown_wstacking(NOT_ENOUGH_MEM_STACKING, "Not enough memory for stacking", __FILE__, __LINE__);
@@ -126,6 +127,8 @@ void gridding_data(){

      double start = CPU_TIME_wt;

      memset( memory, 0, mem_size );
      
      // select data for this sector
      uint icount = 0;
      uint ip = 0;
@@ -195,6 +198,12 @@ void gridding_data(){

      start = CPU_TIME_wt;

      double *stacking_target_array;
      if ( size > 1 )
	stacking_target_array = gridss;
      else
	stacking_target_array = grid;
      
     //We have to call different GPUs per MPI task!!! [GL]
      wstack(param.num_w_planes,
	     Nsec,
@@ -211,7 +220,7 @@ void gridding_data(){
	     param.w_support,
	     xaxis,
	     yaxis,
	     gridss,
	     stacking_target_array,
	     param.num_threads,
	     rank);
      //Allocate memory on devices non-blocking for the host                                                                                   
@@ -244,10 +253,10 @@ void gridding_data(){
      
	  timing_wt.reduce += CPU_TIME_wt - start;

	}
		  
	  // Go to next sector
	  memset ( gridss, 0, 2*param.num_w_planes*xaxis*yaxis * sizeof(double) );	  
	}	

    }