Commit 52710748 authored by Luca Tornatore's avatar Luca Tornatore
Browse files

step in polishing

parent 90e669b8
Loading
Loading
Loading
Loading
+13 −13
Original line number Diff line number Diff line
@@ -71,18 +71,18 @@ OPT += -DPHASE_ON
# ========================================================


DEPS = w-stacking.h  main.c phase_correction.cu allvars.h
COBJ = allvars.o main.o init.o gridding.o gridding_std.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o
DEPS = w-stacking.h  main.c allvars.h
OBJ = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o

DEPS_ACC_CUDA = w-stacking.h w-stacking.cu
COBJ_ACC_CUDA = phase_correction.o w-stacking.o
OBJ_ACC_CUDA = phase_correction.o w-stacking.o

DEPS_ACC_OMP = w-stacking_omp.h 
COBJ_ACC_OMP = phase_correction.o w-stacking_omp.o
OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o

COBJ_NCCL_REDUCE = gridding_nccl.o
OBJ_NCCL_REDUCE = gridding_nccl.o

COBJ_RCCL_REDUCE = gridding_rccl.o
OBJ_RCCL_REDUCE = gridding_rccl.o

ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT)))
CFLAGS += $(FFTW_MPI_INC)
@@ -116,7 +116,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-cuda
LINKER=$(NVCC)
FLAGS=$(NVFLAGS) $(CFLAGS)
LIBS=$(NVLIB)
compile_cuda: $(COBJ_ACC_CUDA)
compile_cuda: $(OBJ_ACC_CUDA)
	$(NVCC) $(OPT) $(NVFLAGS) -c *.cu $(NVLIB)
endif

@@ -125,7 +125,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-omp
LINKER=$(NVC)
FLAGS=$(NVFLAGS) $(CFLAGS)
LIBS=$(NVLIB)
compile_accomp: $(COBJ_ACC_OMP)
compile_accomp: $(OBJ_ACC_OMP)
	$(NVC) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB)
endif

@@ -134,7 +134,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-reduce
LINKER=$(NVC++)
FLAGS=$(NVFLAGS) $(CFLAGS)
LIBS=$(NVLIB) $(NVLIB_3)
compile_accreduce: $(COBJ_NCCL_REDUCE)
compile_accreduce: $(OBJ_NCCL_REDUCE)
	$(NVC++) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB_3)
endif

@@ -143,17 +143,17 @@ EXEC_EXT := $(EXEC_EXT)_acc-reduce
LINKER=$(NVC++)
FLAGS=$(NVFLAGS) $(CFLAGS)
LIBS=$(NVLIB) $(NVLIB_3)
compile_accreduce: $(COBJ_RCCL_REDCUE)
compile_accreduce: $(OBJ_RCCL_REDCUE)
	$(NVC++) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB_3)
endif


###################################################################################

w-stacking: $(COBJ) $(DEPS) Makefile
	@$(LINKER) $(FLAGS) $(OPT) $(FFTWLIBS) $(LIBS) -lmpi -o $(EXEC)$(EXEC_EXT)
w-stacking: $(OBJ) $(DEPS) Makefile
	$(LINKER) $(FLAGS) $(OPTIMIZE) $(OPT) $(FFTWLIBS) $(LIBS) $(OBJ) -lmpi -lm -o $(EXEC)$(EXEC_EXT)

$(COBJ): $(DEPS) Makefile
$(OBJ): $(DEPS) Makefile

%.o: %.c $(DEPS)
	$(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS)
+0 −0

File moved.

+1 −3
Original line number Diff line number Diff line
@@ -5,9 +5,7 @@
void init(int index)
{

  double start_tot;
  start_tot = CPU_TIME_wt;
  double begin = CPU_TIME_pr;
  double begin = CPU_TIME_wt;
  
  // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization
  dx = 1.0/(double)param.grid_size_x;
+9 −8
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@ int main(int argc, char * argv[])
 
  /* Initializing MPI Environment */

  double time_tot = CPU_TIME_wt;
  
 #ifdef _OPENMP
  {
    int thread_level;
@@ -104,6 +106,7 @@ int main(int argc, char * argv[])
    }


  
  for(int ifiles=0; ifiles<param.ndatasets; ifiles++)
    {
      if(rank == 0)
@@ -124,16 +127,14 @@ int main(int argc, char * argv[])
      /* WRITE_FFTW_DATA function */
      write_fftw_data();
                

      /* WRITE_RESULT function */
      write_result();

      
      if(rank == 0)
	printf("*************************************************************\n"); 

    }

  /* WRITE_RESULT function */
  timing_wt.total = CPU_TIME_wt - time_tot;
  write_result();
  
  shutdown(0, NULL, 0, 0);

+11 −1
Original line number Diff line number Diff line
@@ -24,6 +24,16 @@ void *win_hostmaster_ptr;

win_t      *win_ctrl;

MPI_Request *requests;
int          thid;
int          Ntasks_local;
blocks_t     blocks;
double     **swins;
int        **cwins;
int          max_level;
double      *end_4, *end_reduce;
int          dsize_4, iter; 

int numa_build_mapping( int, int, MPI_Comm *, map_t *);
int numa_map_hostnames( MPI_Comm *, int, int, map_t *);
int get_cpu_id( void );
Loading