Loading .gitignore 0 → 100644 +7 −0 Original line number Diff line number Diff line *.o phase_correction.c w-stacking.c w-stackingCfftw w-stackingfftw w-stackingCfftw_serial w-stackingfftw_serial Makefile +18 −10 Original line number Diff line number Diff line OPT += -DUSE_MPI OPT += -DUSE_FFTW OPT += -DONE_SIDE OPT += -DWRITE_DATA OPT += -DNOWRITE_DATA OPT += -DWRITE_IMAGE CC = gcc CXX = g++ MPICC = mpicc MPICXX = mpiCC #OMP = -fopenmp OMP = CFLAGS += -O3 -mcpu=native CFLAGS += -I. LIBS = -L$(FFTW_LIB) -lfftw3_mpi -lfftw3 -lm Loading @@ -17,31 +21,35 @@ NVFLAGS = -arch=sm_70 -c w-stacking.cu -Xcompiler -mno-float128 -std=c++11 NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda DEPS = w-stacking.h COBJ = w-stacking.o w-stacking-fftw.o COBJ = w-stacking.o w-stacking-fftw.o phase_correction.o w-stacking.c: cp w-stacking.cu w-stacking.c phase_correction.c: cp phase_correction.cu phase_correction.c %.o: %.c $(DEPS) $(CC) -c -o $@ $< $(CFLAGS) $(OPT) $(CC) $(OMP) -c -o $@ $< $(CFLAGS) $(OPT) serial: $(COBJ) $(CC) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm $(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm serial_cuda: $(NVCC) $(NVFLAGS) -c w-stacking.cu $(NVLIB) $(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c $(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o $(NVLIB) -lm $(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c phase_correction.c $(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm mpi: $(COBJ) $(MPICC) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS) $(MPICC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS) mpi_cuda: $(NVCC) $(NVFLAGS) -c w-stacking.cu $(NVLIB) $(MPICC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c $(MPICXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o $(NVLIB) $(LIBS) -lm $(MPICC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c phase_correction.c $(MPICXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm clean: rm *.o rm w-stacking.c rm phase_correction.c phase_correction.cu 0 → 100644 +30 −0 Original line number Diff line number Diff line #ifdef _OPENMP #include <omp.h> #endif #include "w-stacking.h" #include <math.h> #include <stdlib.h> #include <stdio.h> #define NWORKERS -1 //100 #define NTHREADS 32 #ifdef __CUDACC__ #endif void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes) { for (int iw=0; iw<num_w_planes; iw++) { for (int iv=0; iw<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw); long img_index = iu+iv*xaxis; image_real[img_index] += gridss[index]; image_imag[img_index] += gridss[index]; } } } w-stacking-fftw.c +50 −2 Original line number Diff line number Diff line Loading @@ -99,7 +99,7 @@ int main(int argc, char * argv[]) int local_grid_size_y;// = 100; int xaxis; int yaxis; int num_w_planes = 1; int num_w_planes = 10; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization int w_support = 7; int num_threads;// = 4; Loading Loading @@ -151,7 +151,7 @@ int main(int argc, char * argv[]) start = clock(); // CLAAAAAA int ndatasets = 2; int ndatasets = 1; strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); Loading Loading @@ -821,6 +821,54 @@ int main(int argc, char * argv[]) fftw_free(fftwgrid); // Phase correction if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes); #ifdef WRITE_IMAGE if(rank == 0) { pFilereal = fopen (fftfile2,"wb"); pFileimg = fopen (fftfile3,"wb"); fclose(pFilereal); fclose(pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); pFilereal = fopen (fftfile2,"ab"); pFileimg = fopen (fftfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); fseek(pFilereal, global_index, SEEK_SET); fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); fseek(pFileimg, global_index, SEEK_SET); fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg); fclose(pFilereal); fclose(pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE #endif //USE_FFTW end = clock(); Loading w-stacking.h +12 −0 Original line number Diff line number Diff line Loading @@ -24,9 +24,21 @@ void wstack( int, double*, int); #ifdef __CUDACC__ extern "C" #endif int test(int nnn); #ifdef __CUDACC__ extern "C" #endif void phase_correction( double*, double*, double*, int, int, int); #endif Loading
.gitignore 0 → 100644 +7 −0 Original line number Diff line number Diff line *.o phase_correction.c w-stacking.c w-stackingCfftw w-stackingfftw w-stackingCfftw_serial w-stackingfftw_serial
Makefile +18 −10 Original line number Diff line number Diff line OPT += -DUSE_MPI OPT += -DUSE_FFTW OPT += -DONE_SIDE OPT += -DWRITE_DATA OPT += -DNOWRITE_DATA OPT += -DWRITE_IMAGE CC = gcc CXX = g++ MPICC = mpicc MPICXX = mpiCC #OMP = -fopenmp OMP = CFLAGS += -O3 -mcpu=native CFLAGS += -I. LIBS = -L$(FFTW_LIB) -lfftw3_mpi -lfftw3 -lm Loading @@ -17,31 +21,35 @@ NVFLAGS = -arch=sm_70 -c w-stacking.cu -Xcompiler -mno-float128 -std=c++11 NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda DEPS = w-stacking.h COBJ = w-stacking.o w-stacking-fftw.o COBJ = w-stacking.o w-stacking-fftw.o phase_correction.o w-stacking.c: cp w-stacking.cu w-stacking.c phase_correction.c: cp phase_correction.cu phase_correction.c %.o: %.c $(DEPS) $(CC) -c -o $@ $< $(CFLAGS) $(OPT) $(CC) $(OMP) -c -o $@ $< $(CFLAGS) $(OPT) serial: $(COBJ) $(CC) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm $(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm serial_cuda: $(NVCC) $(NVFLAGS) -c w-stacking.cu $(NVLIB) $(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c $(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o $(NVLIB) -lm $(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c phase_correction.c $(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm mpi: $(COBJ) $(MPICC) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS) $(MPICC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS) mpi_cuda: $(NVCC) $(NVFLAGS) -c w-stacking.cu $(NVLIB) $(MPICC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c $(MPICXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o $(NVLIB) $(LIBS) -lm $(MPICC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c phase_correction.c $(MPICXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm clean: rm *.o rm w-stacking.c rm phase_correction.c
phase_correction.cu 0 → 100644 +30 −0 Original line number Diff line number Diff line #ifdef _OPENMP #include <omp.h> #endif #include "w-stacking.h" #include <math.h> #include <stdlib.h> #include <stdio.h> #define NWORKERS -1 //100 #define NTHREADS 32 #ifdef __CUDACC__ #endif void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes) { for (int iw=0; iw<num_w_planes; iw++) { for (int iv=0; iw<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw); long img_index = iu+iv*xaxis; image_real[img_index] += gridss[index]; image_imag[img_index] += gridss[index]; } } }
w-stacking-fftw.c +50 −2 Original line number Diff line number Diff line Loading @@ -99,7 +99,7 @@ int main(int argc, char * argv[]) int local_grid_size_y;// = 100; int xaxis; int yaxis; int num_w_planes = 1; int num_w_planes = 10; // DAV: the corresponding KernelLen is calculated within the wstack function. It can be anyway hardcoded for optimization int w_support = 7; int num_threads;// = 4; Loading Loading @@ -151,7 +151,7 @@ int main(int argc, char * argv[]) start = clock(); // CLAAAAAA int ndatasets = 2; int ndatasets = 1; strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); Loading Loading @@ -821,6 +821,54 @@ int main(int argc, char * argv[]) fftw_free(fftwgrid); // Phase correction if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes); #ifdef WRITE_IMAGE if(rank == 0) { pFilereal = fopen (fftfile2,"wb"); pFileimg = fopen (fftfile3,"wb"); fclose(pFilereal); fclose(pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); pFilereal = fopen (fftfile2,"ab"); pFileimg = fopen (fftfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); fseek(pFilereal, global_index, SEEK_SET); fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); fseek(pFileimg, global_index, SEEK_SET); fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg); fclose(pFilereal); fclose(pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE #endif //USE_FFTW end = clock(); Loading
w-stacking.h +12 −0 Original line number Diff line number Diff line Loading @@ -24,9 +24,21 @@ void wstack( int, double*, int); #ifdef __CUDACC__ extern "C" #endif int test(int nnn); #ifdef __CUDACC__ extern "C" #endif void phase_correction( double*, double*, double*, int, int, int); #endif