Loading Build/Makefile.leo +5 −3 Original line number Original line Diff line number Diff line Loading @@ -5,10 +5,12 @@ MPICC = mpicc MPIC++ = mpic++ MPIC++ = mpic++ OPTIMIZE = -O4 -fopenmp -march=native -mavx -mavx2 OPTIMIZE = -O4 -fopenmp -march=native -mavx -mavx2 OPT_PURE_MPI = -O4 -march=native -mavx -mavx2 OMP_GPU = -mp=multicore,gpu -gpu=cuda11.8 -gpu=cc80 OMP_GPU = -mp=multicore,gpu -gpu=cuda11.8 -gpu=cc80 CUDA_INC = -I/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/include CUDA_INC = -I/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/include CUDA_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/lib64 CUDA_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/lib64 -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/targets/x86_64-linux/lib/stubs FFTW_INCL= FFTW_INCL= FFTW_LIB= FFTW_LIB= Loading @@ -34,10 +36,10 @@ NCCL_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/ NVC = nvc NVC = nvc NVC++ = nvc++ NVC++ = nvc++ NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2 NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2 NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcudart NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFTMP_INC) $(CUFFTMP_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcudart -lcufftMp NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFTMP_INC) $(CUFFTMP_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lcuda -lnvidia-ml -lnvshmem_host -lnvshmem_device -lcudart -lcufftMp NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcudart -lnccl NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcudart -lnccl Loading Makefile +23 −6 Original line number Original line Diff line number Diff line Loading @@ -25,8 +25,6 @@ FFTW_MPI_LIB = CFLAGS += -I./ CFLAGS += -I./ FLAGS=$(OPTIMIZE) FFTWLIBS = FFTWLIBS = # ======================================================== # ======================================================== Loading @@ -37,7 +35,10 @@ FFTWLIBS = OPT += -DUSE_FFTW OPT += -DUSE_FFTW # use omp-ized version of fftw routines # use omp-ized version of fftw routines #OPT += -DHYBRID_FFTW OPT += -DHYBRID_FFTW # switch on the OpenMP parallelization OPT += USE_OMP # write the full 3D cube of gridded visibilities and its FFT transform # write the full 3D cube of gridded visibilities and its FFT transform #OPT += -DWRITE_DATA #OPT += -DWRITE_DATA Loading @@ -60,7 +61,7 @@ OPT += -DPHASE_ON # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL #OPT += -DGAUSS_HI_PRECISION #OPT += -DGAUSS_HI_PRECISION #OPT += -DGAUSS OPT += -DGAUSS #OPT += -DKAISERBESSEL #OPT += -DKAISERBESSEL Loading @@ -73,7 +74,7 @@ OPT += -DPHASE_ON #OPT += -DNVIDIA #OPT += -DNVIDIA #use cuda for GPUs #use cuda for GPUs OPT += -DCUDACC #OPT += -DCUDACC # use GPU acceleration via OMP # use GPU acceleration via OMP #OPT += -DACCOMP #OPT += -DACCOMP Loading @@ -85,7 +86,7 @@ OPT += -DCUDACC #OPT += -DRCCL_REDUCE #OPT += -DRCCL_REDUCE # use GPU to perform FFT # use GPU to perform FFT OPT += -DCUFFTMP #OPT += -DCUFFTMP #support for AMD GPUs #support for AMD GPUs #OPT += __HIP_PLATFORM_AMD__ #OPT += __HIP_PLATFORM_AMD__ Loading @@ -95,6 +96,13 @@ OPT += -DCUFFTMP # ======================================================== # ======================================================== ifeq (USE_OMP,$(findstring USE_OMP,$(OPT))) FLAGS=$(OPTIMIZE) else FLAGS=$(OPT_PURE_MPI) endif ifeq (FITSIO,$(findstring FITSIO,$(OPT))) ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio LIBS += -L$(FITSIO_LIB) -lcfitsio endif endif Loading Loading @@ -166,7 +174,11 @@ endif ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) CFLAGS += $(FFTW_MPI_INC) CFLAGS += $(FFTW_MPI_INC) ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT))) FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm else FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm endif endif endif # define rules for sources that contains GPU code # define rules for sources that contains GPU code Loading Loading @@ -291,8 +303,13 @@ w-stacking: $(OBJ) $(DEPS) Makefile #$(OBJ): $(DEPS) Makefile #$(OBJ): $(DEPS) Makefile ifeq (USE_OMP,$(findstring USE_OMP,$(OPT))) %.o: %.c $(DEPS) %.o: %.c $(DEPS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) else %.o: %.c $(DEPS) $(MPICC) $(OPT_PURE_MPI) $(OPT) -c -o $@ $< $(CFLAGS) endif clean: clean: rm -f *.o rm -f *.o Loading Loading
Build/Makefile.leo +5 −3 Original line number Original line Diff line number Diff line Loading @@ -5,10 +5,12 @@ MPICC = mpicc MPIC++ = mpic++ MPIC++ = mpic++ OPTIMIZE = -O4 -fopenmp -march=native -mavx -mavx2 OPTIMIZE = -O4 -fopenmp -march=native -mavx -mavx2 OPT_PURE_MPI = -O4 -march=native -mavx -mavx2 OMP_GPU = -mp=multicore,gpu -gpu=cuda11.8 -gpu=cc80 OMP_GPU = -mp=multicore,gpu -gpu=cuda11.8 -gpu=cc80 CUDA_INC = -I/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/include CUDA_INC = -I/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/include CUDA_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/lib64 CUDA_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/lib64 -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/targets/x86_64-linux/lib/stubs FFTW_INCL= FFTW_INCL= FFTW_LIB= FFTW_LIB= Loading @@ -34,10 +36,10 @@ NCCL_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/ NVC = nvc NVC = nvc NVC++ = nvc++ NVC++ = nvc++ NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2 NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2 NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcudart NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFTMP_INC) $(CUFFTMP_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcudart -lcufftMp NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFTMP_INC) $(CUFFTMP_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lcuda -lnvidia-ml -lnvshmem_host -lnvshmem_device -lcudart -lcufftMp NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcudart -lnccl NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcudart -lnccl Loading
Makefile +23 −6 Original line number Original line Diff line number Diff line Loading @@ -25,8 +25,6 @@ FFTW_MPI_LIB = CFLAGS += -I./ CFLAGS += -I./ FLAGS=$(OPTIMIZE) FFTWLIBS = FFTWLIBS = # ======================================================== # ======================================================== Loading @@ -37,7 +35,10 @@ FFTWLIBS = OPT += -DUSE_FFTW OPT += -DUSE_FFTW # use omp-ized version of fftw routines # use omp-ized version of fftw routines #OPT += -DHYBRID_FFTW OPT += -DHYBRID_FFTW # switch on the OpenMP parallelization OPT += USE_OMP # write the full 3D cube of gridded visibilities and its FFT transform # write the full 3D cube of gridded visibilities and its FFT transform #OPT += -DWRITE_DATA #OPT += -DWRITE_DATA Loading @@ -60,7 +61,7 @@ OPT += -DPHASE_ON # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL #OPT += -DGAUSS_HI_PRECISION #OPT += -DGAUSS_HI_PRECISION #OPT += -DGAUSS OPT += -DGAUSS #OPT += -DKAISERBESSEL #OPT += -DKAISERBESSEL Loading @@ -73,7 +74,7 @@ OPT += -DPHASE_ON #OPT += -DNVIDIA #OPT += -DNVIDIA #use cuda for GPUs #use cuda for GPUs OPT += -DCUDACC #OPT += -DCUDACC # use GPU acceleration via OMP # use GPU acceleration via OMP #OPT += -DACCOMP #OPT += -DACCOMP Loading @@ -85,7 +86,7 @@ OPT += -DCUDACC #OPT += -DRCCL_REDUCE #OPT += -DRCCL_REDUCE # use GPU to perform FFT # use GPU to perform FFT OPT += -DCUFFTMP #OPT += -DCUFFTMP #support for AMD GPUs #support for AMD GPUs #OPT += __HIP_PLATFORM_AMD__ #OPT += __HIP_PLATFORM_AMD__ Loading @@ -95,6 +96,13 @@ OPT += -DCUFFTMP # ======================================================== # ======================================================== ifeq (USE_OMP,$(findstring USE_OMP,$(OPT))) FLAGS=$(OPTIMIZE) else FLAGS=$(OPT_PURE_MPI) endif ifeq (FITSIO,$(findstring FITSIO,$(OPT))) ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio LIBS += -L$(FITSIO_LIB) -lcfitsio endif endif Loading Loading @@ -166,7 +174,11 @@ endif ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT))) CFLAGS += $(FFTW_MPI_INC) CFLAGS += $(FFTW_MPI_INC) ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT))) FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm else FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm endif endif endif # define rules for sources that contains GPU code # define rules for sources that contains GPU code Loading Loading @@ -291,8 +303,13 @@ w-stacking: $(OBJ) $(DEPS) Makefile #$(OBJ): $(DEPS) Makefile #$(OBJ): $(DEPS) Makefile ifeq (USE_OMP,$(findstring USE_OMP,$(OPT))) %.o: %.c $(DEPS) %.o: %.c $(DEPS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) $(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS) else %.o: %.c $(DEPS) $(MPICC) $(OPT_PURE_MPI) $(OPT) -c -o $@ $< $(CFLAGS) endif clean: clean: rm -f *.o rm -f *.o Loading