Loading Build/Makefile.M100 +4 −1 Original line number Diff line number Diff line Loading @@ -8,6 +8,9 @@ MPIC++ = mpiCC FFTW_INCL= -I/cineca/prod/opt/libraries/fftw/3.3.8/spectrum_mpi--10.4.0--binary/include/ FFTW_LIB= /cineca/prod/opt/libraries/fftw/3.3.8/spectrum_mpi--10.4.0--binary/lib FITSIO_INCL= -I/m100/home/userexternal/cgheller/cfitsio-3.49 FITSIO_LIB= /m100/home/userexternal/cgheller/cfitsio-3.49 NVCC = nvcc NVFLAGS = -arch=sm_70 -Xcompiler -mno-float128 -std=c++11 Loading @@ -15,6 +18,6 @@ NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda OMP= #-fopenmp CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) $(FITSIO_INCL) OPTIMIZE = $(OMP) -O3 -mtune=native Makefile +6 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,12 @@ OPT += -DWRITE_DATA OPT += -DWRITE_IMAGE # perform w-stacking phase correction OPT += -DPHASE_ON # Support CFITSIO OPT += -DFITSIO ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio endif DEPS = w-stacking.h w-stacking-fftw.c w-stacking.cu phase_correction.cu Loading inverse-imaging.c +104 −71 Original line number Diff line number Diff line Loading @@ -7,6 +7,9 @@ #include <fftw3-mpi.h> #endif #endif #ifdef FITSIO #include "fitsio.h" #endif #include <omp.h> #include <math.h> #include <time.h> Loading Loading @@ -80,7 +83,11 @@ int main(int argc, char * argv[]) // Image related files char imagepath[900] = "./"; #ifdef FITSIO char imagename[FILENAMELENGTH] = "image_name.fits"; #else char imagename[FILENAMELENGTH] = "image_name.bin"; #endif // Visibilities related variables double * uu; Loading Loading @@ -208,10 +215,7 @@ if(rank == 0){ // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); // Read metadata Loading Loading @@ -329,18 +333,66 @@ if(rank == 0){ // define image variable double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); #ifdef WRITE_DATA double* grid_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* grid_img = (double*) calloc(xaxis*yaxis,sizeof(double)); #endif // reading image strcpy(filename,imagepath); strcat(filename,imagename); printf("Reading Image %s\n",filename); #ifdef FITSIO fitsfile *fptr; int status = 0; int nkeys; fits_open_file(&fptr, filename, READONLY, &status); // header stuff int nocomments; char *header; fits_hdr2str(fptr,0,NULL,0, &header,&nkeys,&status); printf("Number of keywords in the Header = %d\n",nkeys); int header_aux = (int)(80*nkeys/2880); int header_size = (header_aux+1)*2880; printf("Header size (bytes) = %d\n\n",header_size); // get image size int naxis; long * naxes; fits_get_img_dim(fptr, &naxis, &status); naxes = (long *)malloc(sizeof(long)*naxis); fits_get_img_size(fptr, naxis, naxes, &status); if (naxes[0] != xaxis){ printf("Wrong Image Size : %d vs %d\n",naxes[0],xaxis); exit(2); } long * fpixel = (long *) malloc(sizeof(long)*naxis); long * lpixel = (long *) malloc(sizeof(long)*naxis); long * inc = (long *) malloc(sizeof(long)*naxis); int anynul; fpixel[0] = 1; lpixel[0] = xaxis; inc[0] = 1; fpixel[1] = 1; lpixel[1] = rank*yaxis+1; inc[1] = 1; fits_read_subset(fptr, TFLOAT, fpixel, lpixel, inc, NULL, image_real, &anynul, &status); fits_close_file(fptr, &status); #else // all MPI tasks read together: parallel filesystem required pFilereal = fopen (filename,"rb"); long global_index = rank*(xaxis*yaxis)*sizeof(double); fseek(pFilereal, global_index, SEEK_SET); fread(image_real, xaxis*yaxis, sizeof(double), pFilereal); fclose(pFilereal); #endif // image read // We read binary images, however we can easily extend to fits files Loading Loading @@ -392,12 +444,18 @@ if(rank == 0){ fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); grid[fftwindex] = fftwgrid[fftwindex2D][0]; grid[fftwindex+1] = fftwgrid[fftwindex2D][1]; #ifdef WRITE_DATA grid_real[fftwindex2D] = fftwgrid[fftwindex2D][0]; grid_img[fftwindex2D] = fftwgrid[fftwindex2D][1]; #endif } } } fftw_free(fftwgrid); fftw_free(fftwimage); /* // This may be moved inside the WRITE_DATA section: TO BE CHECKED // Create sector grid double * gridss; Loading @@ -413,6 +471,7 @@ if(rank == 0){ #ifndef USE_MPI double * gridtot = (double*) calloc(2*grid_size_x*grid_size_y*num_w_planes,sizeof(double)); #endif */ // Open the MPI Memory Window for the slab #ifdef USE_MPI Loading @@ -428,71 +487,39 @@ if(rank == 0){ printf("WRITING GRIDDED DATA\n"); pFilereal = fopen (outfile2,"wb"); pFileimg = fopen (outfile3,"wb"); #ifdef USE_MPI for (int isector=0; isector<nsectors; isector++) { MPI_Win_lock(MPI_LOCK_SHARED,isector,0,slabwin); MPI_Get(gridss,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,slabwin); MPI_Win_unlock(isector,slabwin); for (long i=0; i<size_of_grid/2; i++) { gridss_real[i] = gridss[2*i]; gridss_img[i] = gridss[2*i+1]; fclose(pFilereal); fclose(pFileimg); } if (num_w_planes > 1) { for (int iw=0; iw<num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], 1, sizeof(double), pFilereal); } for (int iw=0; iw<num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], 1, sizeof(double), pFileimg); //double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]); //fprintf (pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm); } printf("%d writing\n",isector); pFilereal = fopen (outfile2,"ab"); pFileimg = fopen (outfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); } else { for (int iw=0; iw<num_w_planes; iw++) { long global_index = (xaxis*isector*yaxis + iw*grid_size_x*grid_size_y)*sizeof(double); long index = iw*xaxis*yaxis; fseek(pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], xaxis*yaxis, sizeof(double), pFilereal); fwrite(grid_real, xaxis*yaxis, sizeof(double), pFilereal); fseek(pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], xaxis*yaxis, sizeof(double), pFileimg); } } } #else for (int iw=0; iw<num_w_planes; iw++) for (int iv=0; iv<grid_size_y; iv++) for (int iu=0; iu<grid_size_x; iu++) { long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); fwrite(&gridtot[index], 1, sizeof(double), pFilereal); fwrite(&gridtot[index+1], 1, sizeof(double), pFileimg); //double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); //fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); } #endif fwrite(grid_img, xaxis*yaxis, sizeof(double), pFileimg); fclose(pFilereal); fclose(pFileimg); } } #ifdef USE_MPI MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_DATA if(rank == 0)printf("CREATING LINKED LISTS\n"); Loading Loading @@ -654,6 +681,10 @@ if(rank == 0){ printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); #endif #ifdef STOPHERE // Make convolution on the grid #ifdef VERBOSE printf("Processing sector %ld\n",isector); Loading @@ -679,14 +710,15 @@ if(rank == 0){ gridss, num_threads); /* int z =0 ; /* int z =0 ; #pragma omp target map(to:test_i_gpu) map(from:z) { int x; // only accessible from accelerator x = 2; z = x + test_i_gpu; }*/ } */ clock_gettime(CLOCK_MONOTONIC, &finishk); endk = clock(); Loading Loading @@ -728,6 +760,7 @@ if(rank == 0){ reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; // Go to next sector for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; #endif //STOPEHERE // Deallocate all sector arrays free(uus); Loading Loading
Build/Makefile.M100 +4 −1 Original line number Diff line number Diff line Loading @@ -8,6 +8,9 @@ MPIC++ = mpiCC FFTW_INCL= -I/cineca/prod/opt/libraries/fftw/3.3.8/spectrum_mpi--10.4.0--binary/include/ FFTW_LIB= /cineca/prod/opt/libraries/fftw/3.3.8/spectrum_mpi--10.4.0--binary/lib FITSIO_INCL= -I/m100/home/userexternal/cgheller/cfitsio-3.49 FITSIO_LIB= /m100/home/userexternal/cgheller/cfitsio-3.49 NVCC = nvcc NVFLAGS = -arch=sm_70 -Xcompiler -mno-float128 -std=c++11 Loading @@ -15,6 +18,6 @@ NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda OMP= #-fopenmp CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) $(FITSIO_INCL) OPTIMIZE = $(OMP) -O3 -mtune=native
Makefile +6 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,12 @@ OPT += -DWRITE_DATA OPT += -DWRITE_IMAGE # perform w-stacking phase correction OPT += -DPHASE_ON # Support CFITSIO OPT += -DFITSIO ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio endif DEPS = w-stacking.h w-stacking-fftw.c w-stacking.cu phase_correction.cu Loading
inverse-imaging.c +104 −71 Original line number Diff line number Diff line Loading @@ -7,6 +7,9 @@ #include <fftw3-mpi.h> #endif #endif #ifdef FITSIO #include "fitsio.h" #endif #include <omp.h> #include <math.h> #include <time.h> Loading Loading @@ -80,7 +83,11 @@ int main(int argc, char * argv[]) // Image related files char imagepath[900] = "./"; #ifdef FITSIO char imagename[FILENAMELENGTH] = "image_name.fits"; #else char imagename[FILENAMELENGTH] = "image_name.bin"; #endif // Visibilities related variables double * uu; Loading Loading @@ -208,10 +215,7 @@ if(rank == 0){ // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); // Read metadata Loading Loading @@ -329,18 +333,66 @@ if(rank == 0){ // define image variable double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); #ifdef WRITE_DATA double* grid_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* grid_img = (double*) calloc(xaxis*yaxis,sizeof(double)); #endif // reading image strcpy(filename,imagepath); strcat(filename,imagename); printf("Reading Image %s\n",filename); #ifdef FITSIO fitsfile *fptr; int status = 0; int nkeys; fits_open_file(&fptr, filename, READONLY, &status); // header stuff int nocomments; char *header; fits_hdr2str(fptr,0,NULL,0, &header,&nkeys,&status); printf("Number of keywords in the Header = %d\n",nkeys); int header_aux = (int)(80*nkeys/2880); int header_size = (header_aux+1)*2880; printf("Header size (bytes) = %d\n\n",header_size); // get image size int naxis; long * naxes; fits_get_img_dim(fptr, &naxis, &status); naxes = (long *)malloc(sizeof(long)*naxis); fits_get_img_size(fptr, naxis, naxes, &status); if (naxes[0] != xaxis){ printf("Wrong Image Size : %d vs %d\n",naxes[0],xaxis); exit(2); } long * fpixel = (long *) malloc(sizeof(long)*naxis); long * lpixel = (long *) malloc(sizeof(long)*naxis); long * inc = (long *) malloc(sizeof(long)*naxis); int anynul; fpixel[0] = 1; lpixel[0] = xaxis; inc[0] = 1; fpixel[1] = 1; lpixel[1] = rank*yaxis+1; inc[1] = 1; fits_read_subset(fptr, TFLOAT, fpixel, lpixel, inc, NULL, image_real, &anynul, &status); fits_close_file(fptr, &status); #else // all MPI tasks read together: parallel filesystem required pFilereal = fopen (filename,"rb"); long global_index = rank*(xaxis*yaxis)*sizeof(double); fseek(pFilereal, global_index, SEEK_SET); fread(image_real, xaxis*yaxis, sizeof(double), pFilereal); fclose(pFilereal); #endif // image read // We read binary images, however we can easily extend to fits files Loading Loading @@ -392,12 +444,18 @@ if(rank == 0){ fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); grid[fftwindex] = fftwgrid[fftwindex2D][0]; grid[fftwindex+1] = fftwgrid[fftwindex2D][1]; #ifdef WRITE_DATA grid_real[fftwindex2D] = fftwgrid[fftwindex2D][0]; grid_img[fftwindex2D] = fftwgrid[fftwindex2D][1]; #endif } } } fftw_free(fftwgrid); fftw_free(fftwimage); /* // This may be moved inside the WRITE_DATA section: TO BE CHECKED // Create sector grid double * gridss; Loading @@ -413,6 +471,7 @@ if(rank == 0){ #ifndef USE_MPI double * gridtot = (double*) calloc(2*grid_size_x*grid_size_y*num_w_planes,sizeof(double)); #endif */ // Open the MPI Memory Window for the slab #ifdef USE_MPI Loading @@ -428,71 +487,39 @@ if(rank == 0){ printf("WRITING GRIDDED DATA\n"); pFilereal = fopen (outfile2,"wb"); pFileimg = fopen (outfile3,"wb"); #ifdef USE_MPI for (int isector=0; isector<nsectors; isector++) { MPI_Win_lock(MPI_LOCK_SHARED,isector,0,slabwin); MPI_Get(gridss,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,slabwin); MPI_Win_unlock(isector,slabwin); for (long i=0; i<size_of_grid/2; i++) { gridss_real[i] = gridss[2*i]; gridss_img[i] = gridss[2*i+1]; fclose(pFilereal); fclose(pFileimg); } if (num_w_planes > 1) { for (int iw=0; iw<num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], 1, sizeof(double), pFilereal); } for (int iw=0; iw<num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], 1, sizeof(double), pFileimg); //double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]); //fprintf (pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm); } printf("%d writing\n",isector); pFilereal = fopen (outfile2,"ab"); pFileimg = fopen (outfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); } else { for (int iw=0; iw<num_w_planes; iw++) { long global_index = (xaxis*isector*yaxis + iw*grid_size_x*grid_size_y)*sizeof(double); long index = iw*xaxis*yaxis; fseek(pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], xaxis*yaxis, sizeof(double), pFilereal); fwrite(grid_real, xaxis*yaxis, sizeof(double), pFilereal); fseek(pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], xaxis*yaxis, sizeof(double), pFileimg); } } } #else for (int iw=0; iw<num_w_planes; iw++) for (int iv=0; iv<grid_size_y; iv++) for (int iu=0; iu<grid_size_x; iu++) { long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); fwrite(&gridtot[index], 1, sizeof(double), pFilereal); fwrite(&gridtot[index+1], 1, sizeof(double), pFileimg); //double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); //fprintf (pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); } #endif fwrite(grid_img, xaxis*yaxis, sizeof(double), pFileimg); fclose(pFilereal); fclose(pFileimg); } } #ifdef USE_MPI MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_DATA if(rank == 0)printf("CREATING LINKED LISTS\n"); Loading Loading @@ -654,6 +681,10 @@ if(rank == 0){ printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax); #endif #ifdef STOPHERE // Make convolution on the grid #ifdef VERBOSE printf("Processing sector %ld\n",isector); Loading @@ -679,14 +710,15 @@ if(rank == 0){ gridss, num_threads); /* int z =0 ; /* int z =0 ; #pragma omp target map(to:test_i_gpu) map(from:z) { int x; // only accessible from accelerator x = 2; z = x + test_i_gpu; }*/ } */ clock_gettime(CLOCK_MONOTONIC, &finishk); endk = clock(); Loading Loading @@ -728,6 +760,7 @@ if(rank == 0){ reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; // Go to next sector for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; #endif //STOPEHERE // Deallocate all sector arrays free(uus); Loading