CUDA bug fixing (011d30c6) · Commits · Claudio Gheller / HPC_Imaging

Makefile

+33 −9

Original line number	Diff line number	Diff line
		@@ -37,7 +37,7 @@ FFTWLIBS =
		OPT += -DUSE_FFTW

		# use omp-ized version of fftw routines
		OPT += -DHYBRID_FFTW
		#OPT += -DHYBRID_FFTW

		# write the full 3D cube of gridded visibilities and its FFT transform
		#OPT += -DWRITE_DATA
		@@ -73,10 +73,10 @@ OPT += -DPHASE_ON
		#OPT += -DNVIDIA

		#use cuda for GPUs
		#OPT += -DCUDACC
		OPT += -DCUDACC

		# use GPU acceleration via OMP
		OPT += -DACCOMP
		#OPT += -DACCOMP

		# use NVIDIA GPU to perform the reduce
		#OPT += -DNCCL_REDUCE
		@@ -85,7 +85,7 @@ OPT += -DACCOMP
		#OPT += -DRCCL_REDUCE

		# use GPU to perform FFT
		#OPT += -DCUFFTMP
		OPT += -DCUFFTMP

		#support for AMD GPUs
		#OPT += __HIP_PLATFORM_AMD__
		@@ -141,8 +141,13 @@ OBJ_RCCL_REDUCE = gridding_rccl.o

		# ----- define what files will be compiled by NVCC for Nvidia cufftMP implementation of FFT
		#
		DEPS_ACC_CUFFTMP = w-stacking_omp.h cuda_fft.cpp
		ifeq (CUDACC,$(findstring CUDACC,$(OPT)))
		DEPS_ACC_CUFFTMP = cuda_fft.cu
		OBJ_ACC_CUFFTMP = cuda_fft.o
		else
		DEPS_ACC_CUFFTMP = cuda_fft.cpp
		OBJ_ACC_CUFFTMP = cuda_fft.o
		endif


		# -----------------------------------------------------
		@@ -173,13 +178,19 @@ w-stacking.c: w-stacking.cu

		phase_correction.c: phase_correction.cu
		cp phase_correction.cu phase_correction.c

		cuda_fft.cpp: cuda_fft.cu
		cp cuda_fft.cu cuda_fft.cpp
		else
		w-stacking.c: w-stacking.cu
		rm -f w-stacking.cun
		rm -f w-stacking.c
		touch w-stacking.c
		phase_correction.c: phase_correction.cu
		rm -f phase_correction.c
		touch phase_correction.c
		cuda_fft.cpp: cuda_fft.cu
		rm -f cuda_fft.cpp
		touch cuda_fft.cpp
		endif


		@@ -250,15 +261,28 @@ OBJ += $(OBJ_RCCL_REDUCE)
		endif

		ifeq (CUFFTMP,$(findstring CUFFTMP,$(OPT)))

		ifeq (CUDACC,$(findstring CUDACC,$(OPT)))
		EXEC_EXT := $(EXEC_EXT)_acc-fft
		LINKER=$(MPIC++)
		FLAGS=$(OPTIMIZE)
		LIBS=$(NVLIB_2)
		$(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP)
		$(NVCC) $(OPT_NVCC) $(OPT) -c $^ $(LIBS)
		OBJ += $(OBJ_ACC_CUFFTMP)

		else

		EXEC_EXT := $(EXEC_EXT)_acc-fft
		LINKER=$(NVC++)
		FLAGS=$(NVFLAGS) $(CFLAGS)
		LIBS=$(NVLIB) $(NVLIB_2)
		LIBS=$(NVLIB_2)
		$(OBJ_ACC_CUFFTMP): $(DEPS_ACC_CUFFTMP)
		$(NVC++) $(FLAGS) $(OPT) -c $^ $(LIBS)
		OBJ += $(OBJ_ACC_CUFFTMP)
		endif

		endif

		###################################################################################

		@@ -270,11 +294,11 @@ w-stacking: $(OBJ) $(DEPS) Makefile
		%.o: %.c $(DEPS)
		$(MPICC) $(OPTIMIZE) $(OPT) -c -o $@ $< $(CFLAGS)


		clean:
		rm -f *.o
		rm -f w-stacking.c
		rm -f phase_correction.c
		rm -f cuda_fft.cpp

		cleanall:
		rm -f $(EXEC)$(EXT)

allvars.h

+0 −3

Original line number	Diff line number	Diff line
		@@ -34,9 +34,6 @@
		#include "w-stacking.h"
		#endif

		#if defined(CUDACC)
		#include <cuda.h>
		#endif

		#if defined(NVIDIA)
		#include <cuda_runtime.h>

allvars_nccl.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@
		#include <unistd.h>


		#if !defined( NCCL_REDUCE )
		#if !defined( NCCL_REDUCE ) && !defined(__CUDACC__)
		#include <stdatomic.h>
		#endif

main.c

+0 −20

Original line number	Diff line number	Diff line
		@@ -3,7 +3,6 @@
		#include "allvars.h"
		#include "proto.h"


		void shutdown_wstacking( int errcode, char message, char fname, int linenum )
		{
		if ( ( rank == 0 ) &&
		@@ -77,25 +76,6 @@ int main(int argc, char * argv[])

		FFT_INIT;

		#if defined(CUDACC)
		int ndevices;
		cudaGetDeviceCount(&ndevices);
		cudaSetDevice(rank % ndevices);

		if ( rank == 0 ) {
		if (0 == ndevices) {

		shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ );
		}


		printf("Running rank %d/%d using GPU %d\n", rank, size, rank % ndevices);
		#ifdef NVIDIA
		prtAccelInfo();
		#endif
		}
		#endif

		#ifdef ACCOMP
		if ( rank == 0 ) {
		if (0 == omp_get_num_devices()) {

phase_correction.cu

+18 −1

Original line number	Diff line number	Diff line
		@@ -11,6 +11,8 @@
		#include <math.h>
		#include <stdlib.h>
		#include <stdio.h>
		#include "errcodes.h"
		#include "proto.h"

		#ifdef __CUDACC__

		@@ -107,6 +109,21 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
		long Nbl = (long)((num_w_planesxaxisyaxis)/Nth/nbucket) + 1;
		if(NWORKERS == 1) {Nbl = 1; Nth = 1;};

		int ndevices;
		cudaGetDeviceCount(&ndevices);
		cudaSetDevice(rank % ndevices);

		if ( rank == 0 ) {
		if (0 == ndevices) {

		shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ );
		}

		}
		printf("Running rank %d using GPU %d\n", rank, rank % ndevices);
		#ifdef NVIDIA
		prtAccelInfo();
		#endif

		cudaError_t mmm;
		double * image_real_g;