Loading Makefile +13 −13 Original line number Diff line number Diff line Loading @@ -71,18 +71,18 @@ OPT += -DPHASE_ON # ======================================================== DEPS = w-stacking.h main.c phase_correction.cu allvars.h COBJ = allvars.o main.o init.o gridding.o gridding_std.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o DEPS = w-stacking.h main.c allvars.h OBJ = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o DEPS_ACC_CUDA = w-stacking.h w-stacking.cu COBJ_ACC_CUDA = phase_correction.o w-stacking.o OBJ_ACC_CUDA = phase_correction.o w-stacking.o DEPS_ACC_OMP = w-stacking_omp.h COBJ_ACC_OMP = phase_correction.o w-stacking_omp.o OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o COBJ_NCCL_REDUCE = gridding_nccl.o OBJ_NCCL_REDUCE = gridding_nccl.o COBJ_RCCL_REDUCE = gridding_rccl.o OBJ_RCCL_REDUCE = gridding_rccl.o ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) CFLAGS += $(FFTW_MPI_INC) Loading Loading @@ -116,7 +116,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-cuda LINKER=$(NVCC) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) compile_cuda: $(COBJ_ACC_CUDA) compile_cuda: $(OBJ_ACC_CUDA) $(NVCC) $(OPT) $(NVFLAGS) -c *.cu $(NVLIB) endif Loading @@ -125,7 +125,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-omp LINKER=$(NVC) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) compile_accomp: $(COBJ_ACC_OMP) compile_accomp: $(OBJ_ACC_OMP) $(NVC) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB) endif Loading @@ -134,7 +134,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-reduce LINKER=$(NVC++) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) $(NVLIB_3) compile_accreduce: $(COBJ_NCCL_REDUCE) compile_accreduce: $(OBJ_NCCL_REDUCE) $(NVC++) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB_3) endif Loading @@ -143,17 +143,17 @@ EXEC_EXT := $(EXEC_EXT)_acc-reduce LINKER=$(NVC++) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) $(NVLIB_3) compile_accreduce: $(COBJ_RCCL_REDCUE) compile_accreduce: $(OBJ_RCCL_REDCUE) $(NVC++) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB_3) endif ################################################################################### w-stacking: $(COBJ) $(DEPS) Makefile @$(LINKER) $(FLAGS) $(OPT) $(FFTWLIBS) $(LIBS) -lmpi -o $(EXEC)$(EXEC_EXT) w-stacking: $(OBJ) $(DEPS) Makefile $(LINKER) $(FLAGS) $(OPTIMIZE) $(OPT) $(FFTWLIBS) $(LIBS) $(OBJ) -lmpi -lm -o $(EXEC)$(EXEC_EXT) $(COBJ): $(DEPS) Makefile $(OBJ): $(DEPS) Makefile %.o: %.c $(DEPS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) Loading gridding_std.c→gridding_cpu.c +0 −0 File moved. View file init.c +1 −3 Original line number Diff line number Diff line Loading @@ -5,9 +5,7 @@ void init(int index) { double start_tot; start_tot = CPU_TIME_wt; double begin = CPU_TIME_pr; double begin = CPU_TIME_wt; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization dx = 1.0/(double)param.grid_size_x; Loading main.c +9 −8 Original line number Diff line number Diff line Loading @@ -44,6 +44,8 @@ int main(int argc, char * argv[]) /* Initializing MPI Environment */ double time_tot = CPU_TIME_wt; #ifdef _OPENMP { int thread_level; Loading Loading @@ -104,6 +106,7 @@ int main(int argc, char * argv[]) } for(int ifiles=0; ifiles<param.ndatasets; ifiles++) { if(rank == 0) Loading @@ -124,16 +127,14 @@ int main(int argc, char * argv[]) /* WRITE_FFTW_DATA function */ write_fftw_data(); /* WRITE_RESULT function */ write_result(); if(rank == 0) printf("*************************************************************\n"); } /* WRITE_RESULT function */ timing_wt.total = CPU_TIME_wt - time_tot; write_result(); shutdown(0, NULL, 0, 0); Loading numa.c +11 −1 Original line number Diff line number Diff line Loading @@ -24,6 +24,16 @@ void *win_hostmaster_ptr; win_t *win_ctrl; MPI_Request *requests; int thid; int Ntasks_local; blocks_t blocks; double **swins; int **cwins; int max_level; double *end_4, *end_reduce; int dsize_4, iter; int numa_build_mapping( int, int, MPI_Comm *, map_t *); int numa_map_hostnames( MPI_Comm *, int, int, map_t *); int get_cpu_id( void ); Loading Loading
Makefile +13 −13 Original line number Diff line number Diff line Loading @@ -71,18 +71,18 @@ OPT += -DPHASE_ON # ======================================================== DEPS = w-stacking.h main.c phase_correction.cu allvars.h COBJ = allvars.o main.o init.o gridding.o gridding_std.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o DEPS = w-stacking.h main.c allvars.h OBJ = allvars.o main.o init.o gridding.o gridding_cpu.o fourier_transform.o result.o numa.o reduce.o w-stacking.o phase_correction.o DEPS_ACC_CUDA = w-stacking.h w-stacking.cu COBJ_ACC_CUDA = phase_correction.o w-stacking.o OBJ_ACC_CUDA = phase_correction.o w-stacking.o DEPS_ACC_OMP = w-stacking_omp.h COBJ_ACC_OMP = phase_correction.o w-stacking_omp.o OBJ_ACC_OMP = phase_correction.o w-stacking_omp.o COBJ_NCCL_REDUCE = gridding_nccl.o OBJ_NCCL_REDUCE = gridding_nccl.o COBJ_RCCL_REDUCE = gridding_rccl.o OBJ_RCCL_REDUCE = gridding_rccl.o ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) CFLAGS += $(FFTW_MPI_INC) Loading Loading @@ -116,7 +116,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-cuda LINKER=$(NVCC) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) compile_cuda: $(COBJ_ACC_CUDA) compile_cuda: $(OBJ_ACC_CUDA) $(NVCC) $(OPT) $(NVFLAGS) -c *.cu $(NVLIB) endif Loading @@ -125,7 +125,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-omp LINKER=$(NVC) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) compile_accomp: $(COBJ_ACC_OMP) compile_accomp: $(OBJ_ACC_OMP) $(NVC) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB) endif Loading @@ -134,7 +134,7 @@ EXEC_EXT := $(EXEC_EXT)_acc-reduce LINKER=$(NVC++) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) $(NVLIB_3) compile_accreduce: $(COBJ_NCCL_REDUCE) compile_accreduce: $(OBJ_NCCL_REDUCE) $(NVC++) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB_3) endif Loading @@ -143,17 +143,17 @@ EXEC_EXT := $(EXEC_EXT)_acc-reduce LINKER=$(NVC++) FLAGS=$(NVFLAGS) $(CFLAGS) LIBS=$(NVLIB) $(NVLIB_3) compile_accreduce: $(COBJ_RCCL_REDCUE) compile_accreduce: $(OBJ_RCCL_REDCUE) $(NVC++) $(NVFLAGS) $(OPT) -c $^ $(CFLAGS) $(NVLIB_3) endif ################################################################################### w-stacking: $(COBJ) $(DEPS) Makefile @$(LINKER) $(FLAGS) $(OPT) $(FFTWLIBS) $(LIBS) -lmpi -o $(EXEC)$(EXEC_EXT) w-stacking: $(OBJ) $(DEPS) Makefile $(LINKER) $(FLAGS) $(OPTIMIZE) $(OPT) $(FFTWLIBS) $(LIBS) $(OBJ) -lmpi -lm -o $(EXEC)$(EXEC_EXT) $(COBJ): $(DEPS) Makefile $(OBJ): $(DEPS) Makefile %.o: %.c $(DEPS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) Loading
init.c +1 −3 Original line number Diff line number Diff line Loading @@ -5,9 +5,7 @@ void init(int index) { double start_tot; start_tot = CPU_TIME_wt; double begin = CPU_TIME_pr; double begin = CPU_TIME_wt; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization dx = 1.0/(double)param.grid_size_x; Loading
main.c +9 −8 Original line number Diff line number Diff line Loading @@ -44,6 +44,8 @@ int main(int argc, char * argv[]) /* Initializing MPI Environment */ double time_tot = CPU_TIME_wt; #ifdef _OPENMP { int thread_level; Loading Loading @@ -104,6 +106,7 @@ int main(int argc, char * argv[]) } for(int ifiles=0; ifiles<param.ndatasets; ifiles++) { if(rank == 0) Loading @@ -124,16 +127,14 @@ int main(int argc, char * argv[]) /* WRITE_FFTW_DATA function */ write_fftw_data(); /* WRITE_RESULT function */ write_result(); if(rank == 0) printf("*************************************************************\n"); } /* WRITE_RESULT function */ timing_wt.total = CPU_TIME_wt - time_tot; write_result(); shutdown(0, NULL, 0, 0); Loading
numa.c +11 −1 Original line number Diff line number Diff line Loading @@ -24,6 +24,16 @@ void *win_hostmaster_ptr; win_t *win_ctrl; MPI_Request *requests; int thid; int Ntasks_local; blocks_t blocks; double **swins; int **cwins; int max_level; double *end_4, *end_reduce; int dsize_4, iter; int numa_build_mapping( int, int, MPI_Comm *, map_t *); int numa_map_hostnames( MPI_Comm *, int, int, map_t *); int get_cpu_id( void ); Loading