Commit 473bb207 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Switch on/off the USE_OMP define for using OpenMP with CPUs

parent f1efc03a
Loading
Loading
Loading
Loading
+5 −3
Original line number Original line Diff line number Diff line
@@ -5,10 +5,12 @@ MPICC = mpicc
MPIC++   =  mpic++
MPIC++   =  mpic++


OPTIMIZE = -O4 -fopenmp -march=native -mavx -mavx2 
OPTIMIZE = -O4 -fopenmp -march=native -mavx -mavx2 
OPT_PURE_MPI = -O4 -march=native -mavx -mavx2

OMP_GPU = -mp=multicore,gpu -gpu=cuda11.8 -gpu=cc80
OMP_GPU = -mp=multicore,gpu -gpu=cuda11.8 -gpu=cc80


CUDA_INC = -I/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/include
CUDA_INC = -I/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/include
CUDA_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/lib64
CUDA_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/lib64 -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/cuda/11.8/targets/x86_64-linux/lib/stubs


FFTW_INCL=
FFTW_INCL=
FFTW_LIB=
FFTW_LIB=
@@ -34,10 +36,10 @@ NCCL_LIB = -L/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/
NVC = nvc 
NVC = nvc 
NVC++ = nvc++
NVC++ = nvc++
NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2  
NVFLAGS = -O4 -fast -march=native $(OMP_GPU) -mavx -mavx2  
NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcudart
NVLIB = $(CUDA_INC) $(CUDA_LIB) -lcuda -lcudart




NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFTMP_INC) $(CUFFTMP_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lnvshmem_host -lnvshmem_device -lcudart -lcufftMp 
NVLIB_2 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(CUFFTMP_INC) $(CUFFTMP_LIB) $(NVSHMEM_INC) $(NVSHMEM_LIB) -lcuda -lnvidia-ml -lnvshmem_host -lnvshmem_device -lcudart -lcufftMp 


NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcudart -lnccl
NVLIB_3 = $(CUDA_INC) $(CUDA_LIB) $(MPI_INC) $(MPI_LIB) $(NCCL_INC) $(NCCL_LIB) -lcudart -lnccl


+23 −6
Original line number Original line Diff line number Diff line
@@ -25,8 +25,6 @@ FFTW_MPI_LIB =


CFLAGS += -I./
CFLAGS += -I./


FLAGS=$(OPTIMIZE)

FFTWLIBS =
FFTWLIBS =


# ========================================================
# ========================================================
@@ -37,7 +35,10 @@ FFTWLIBS =
OPT += -DUSE_FFTW
OPT += -DUSE_FFTW


# use omp-ized version of fftw routines
# use omp-ized version of fftw routines
#OPT += -DHYBRID_FFTW
OPT += -DHYBRID_FFTW

# switch on the OpenMP parallelization
OPT += USE_OMP


# write the full 3D cube of gridded visibilities and its FFT transform
# write the full 3D cube of gridded visibilities and its FFT transform
#OPT += -DWRITE_DATA
#OPT += -DWRITE_DATA
@@ -60,7 +61,7 @@ OPT += -DPHASE_ON
# Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
# Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL
#OPT += -DGAUSS_HI_PRECISION
#OPT += -DGAUSS_HI_PRECISION


#OPT += -DGAUSS
OPT += -DGAUSS


#OPT += -DKAISERBESSEL
#OPT += -DKAISERBESSEL


@@ -73,7 +74,7 @@ OPT += -DPHASE_ON
#OPT += -DNVIDIA
#OPT += -DNVIDIA


#use cuda for GPUs
#use cuda for GPUs
OPT += -DCUDACC
#OPT += -DCUDACC


# use GPU acceleration via OMP 
# use GPU acceleration via OMP 
#OPT += -DACCOMP
#OPT += -DACCOMP
@@ -85,7 +86,7 @@ OPT += -DCUDACC
#OPT += -DRCCL_REDUCE
#OPT += -DRCCL_REDUCE


# use GPU to perform FFT
# use GPU to perform FFT
OPT += -DCUFFTMP
#OPT += -DCUFFTMP


#support for AMD GPUs
#support for AMD GPUs
#OPT += __HIP_PLATFORM_AMD__
#OPT += __HIP_PLATFORM_AMD__
@@ -95,6 +96,13 @@ OPT += -DCUFFTMP


# ========================================================
# ========================================================



ifeq (USE_OMP,$(findstring USE_OMP,$(OPT)))
FLAGS=$(OPTIMIZE)
else
FLAGS=$(OPT_PURE_MPI)
endif

ifeq (FITSIO,$(findstring FITSIO,$(OPT)))
ifeq (FITSIO,$(findstring FITSIO,$(OPT)))
        LIBS += -L$(FITSIO_LIB) -lcfitsio
        LIBS += -L$(FITSIO_LIB) -lcfitsio
endif	
endif	
@@ -166,7 +174,11 @@ endif


ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT)))
ifeq (USE_FFTW,$(findstring USE_FFTW,$(OPT)))
CFLAGS += $(FFTW_MPI_INC)
CFLAGS += $(FFTW_MPI_INC)
ifeq (HIBRYD_FFTW,$(findstring HYBRID_FFTW,$(OPT)))
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_omp -lfftw3_mpi -lfftw3 -lm
else
FFTWLIBS = $(FFTW_MPI_LIB) -lfftw3_mpi -lfftw3 -lm
endif
endif
endif


# define rules for sources that contains GPU code
# define rules for sources that contains GPU code
@@ -291,8 +303,13 @@ w-stacking: $(OBJ) $(DEPS) Makefile


#$(OBJ): $(DEPS) Makefile
#$(OBJ): $(DEPS) Makefile


ifeq (USE_OMP,$(findstring USE_OMP,$(OPT)))
%.o: %.c $(DEPS)
%.o: %.c $(DEPS)
	$(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS)
	$(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS)
else
%.o: %.c $(DEPS)
	$(MPICC) $(OPT_PURE_MPI) $(OPT) -c -o $@ $< $(CFLAGS)
endif


clean:
clean:
	rm -f *.o
	rm -f *.o