From b53acb4edc630d288dcf868c67fb36a18e8b0a0c Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Tue, 17 Jan 2023 17:35:55 +0100 Subject: [PATCH 01/11] Allows to plot fits files produced as output --- scripts/plotgrid_fits.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 scripts/plotgrid_fits.py diff --git a/scripts/plotgrid_fits.py b/scripts/plotgrid_fits.py new file mode 100644 index 0000000..0b4583e --- /dev/null +++ b/scripts/plotgrid_fits.py @@ -0,0 +1,32 @@ +#!/usr/bin/python3 +import numpy as np +import matplotlib.pyplot as plt +from astropy.io import fits +real_fits = "/home/emanuele/hpc_imaging/test_fits_real.fits" +imag_fits = "/home/emanuele/hpc_imaging/test_fits_img.fits" +nplanes = 1 + + +with fits.open(real_fits) as hdu_real: + img_hdu_real = np.array(hdu_real[0].data) + +with fits.open(imag_fits) as hdu_imag: + img_hdu_imag = np.array(hdu_imag[0].data) + + +xaxis = int(np.sqrt(img_hdu_real.size)) +yaxes = xaxis +residual = np.vectorize(complex)(img_hdu_real, img_hdu_imag) + +cumul2d = residual.reshape((xaxis,yaxes,nplanes), order='F') + +for i in range(nplanes): + gridded = np.squeeze(cumul2d[:,:,i]) + ax = plt.subplot() + img = ax.imshow(np.abs(np.fft.fftshift(gridded)), aspect='auto', interpolation='none', origin='lower') + ax.set_xlabel('cell') + ax.set_ylabel('cell') + cbar = plt.colorbar(img) + cbar.set_label('norm(FFT)',size=18) + figname='fits_image_' + str(i) + '.png' + plt.savefig(figname) -- GitLab From d3efe10f60953fc6cff8dfa48f977affa3f1f5f3 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Tue, 17 Jan 2023 17:41:54 +0100 Subject: [PATCH 02/11] Makefile for M100 with CFITSIO implementation --- Build/Makefile.M100 | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Build/Makefile.M100 b/Build/Makefile.M100 index 1fa008e..eaf92e2 100644 --- a/Build/Makefile.M100 +++ b/Build/Makefile.M100 @@ -1,3 +1,4 @@ + CC = gcc CXX = g++ @@ -5,8 +6,11 @@ MPICC = mpicc MPIC++ = mpiCC -#FFTW_INCL= -I/home/taffoni/sw/include -#FFTW_LIB= -L/home/taffoni/sw/lib +FFTW_INCL= -I/cineca/prod/opt/libraries/fftw/3.3.8/spectrum_mpi--10.4.0--binary/include/ +FFTW_LIB= /cineca/prod/opt/libraries/fftw/3.3.8/spectrum_mpi--10.4.0--binary/lib + +FITSIO_INCL= -I/m100_work/IscrC_CD-DLS/cfitsio-3.49 +FITSIO_LIB= /m100_work/IscrC_CD-DLS/cfitsio-3.49 NVCC = nvcc @@ -15,6 +19,6 @@ NVLIB = -L/cineca/prod/opt/compilers/cuda/10.1/none/lib64/ -lcudart -lcuda OMP= -fopenmp -CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) +CFLAGS += -I. $(FFTW_INCL) $(GSL_INCL) $(MPI_INCL) $(FITSIO_INCL) OPTIMIZE = $(OMP) -O3 -mtune=native -- GitLab From 84d092c1b3cda3e0005f3269b5dd3d0d55c099ba Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Tue, 17 Jan 2023 17:44:25 +0100 Subject: [PATCH 03/11] New Makefile with CFITSIO implementation --- Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 23d161d..c034046 100644 --- a/Makefile +++ b/Makefile @@ -27,14 +27,19 @@ endif #OPT += -DNVIDIA # perform one-side communication (suggested) instead of reduce (only if MPI is active) -OPT += -DONE_SIDE +#OPT += -DONE_SIDE # write the full 3D cube of gridded visibilities and its FFT transform #OPT += -DWRITE_DATA # write the final image OPT += -DWRITE_IMAGE # perform w-stacking phase correction -OPT += -DPHASE_ON +#OPT += -DPHASE_ON +# Support CFITSIO +OPT += -DFITSIO +ifeq (FITSIO,$(findstring FITSIO,$(OPT))) + LIBS += -L$(FITSIO_LIB) -lcfitsio +endif DEPS = w-stacking.h w-stacking-fftw.c w-stacking.cu phase_correction.cu COBJ = w-stacking.o w-stacking-fftw.o phase_correction.o -- GitLab From ef5ea6f46e619a782532291d830e9b3222177685 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Sun, 22 Jan 2023 18:30:20 +0100 Subject: [PATCH 04/11] CFITSIO serial implementation --- w-stacking-fftw.c | 113 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 90 insertions(+), 23 deletions(-) diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 5469014..6e00e73 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -1,6 +1,9 @@ #include #include #include +#ifdef FITSIO +#include "/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h" +#endif #ifdef USE_MPI #include #ifdef USE_FFTW @@ -44,6 +47,7 @@ int main(int argc, char * argv[]) int rank; int size; + // Define main filenames FILE * pFile; FILE * pFile1; @@ -51,7 +55,7 @@ int main(int argc, char * argv[]) FILE * pFileimg; // Global filename to be composed char filename[1000]; - + // MS paths char datapath[900]; char datapath_multi[NFILES][900]; @@ -118,11 +122,29 @@ int main(int argc, char * argv[]) int num_threads; // Resolution - double dx = 1.0/(double)grid_size_x; - double dw = 1.0/(double)num_w_planes; + double dx = 0.5/(double)grid_size_x; + double dw = 0.5/(double)num_w_planes; // Half support size double w_supporth = (double)((w_support-1)/2)*dx; + + // Initialize FITS image parameters + fitsfile *fptreal; + fitsfile *fptrimg; + int status; + long nelements; + long fpixel, lpixel; + char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; + char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; + + long naxis = 2; + long naxes[2] = { grid_size_x, grid_size_y }; + + nelements = naxes[0] * naxes[1]; + + + + // Internal profiling parameters clock_t start, end, start0, startk, endk; double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time; @@ -201,9 +223,9 @@ if(rank == 0){ // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; - //strcpy(datapath_multi[0],"data/newgauss2noconj_t201806301100_SBL180.binMS/"); + strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); - strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); + //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); @@ -917,8 +939,8 @@ if(rank == 0){ clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); if(rank == 0)printf("PHASE CORRECTION\n"); - double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); - double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); + double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); + double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,num_w_planes,grid_size_x,grid_size_y,resolution,wmin,wmax,num_threads); @@ -931,10 +953,33 @@ if(rank == 0){ if(rank == 0) { - pFilereal = fopen (fftfile2,"wb"); - pFileimg = fopen (fftfile3,"wb"); - fclose(pFilereal); - fclose(pFileimg); + #ifdef FITSIO + printf("REMOVING RESIDUAL FITS FILE\n"); + remove(testfitsreal); + remove(testfitsimag); + + + printf("FITS CREATING\n"); + status = 0; + + fits_create_file(&fptrimg, testfitsimag, &status); + fits_create_img(fptrimg, DOUBLE_IMG, naxis, naxes, &status); +// fits_write_img(fptrimg, TDOUBLE, fpixel, nelements, image_imag, &status); + fits_close_file(fptrimg, &status); + + status = 0; + + fits_create_file(&fptreal, testfitsreal, &status); + fits_create_img(fptreal, DOUBLE_IMG, naxis, naxes, &status); +// fits_write_img(fptreal, TDOUBLE, fpixel, nelements, image_real, &status); + fits_close_file(fptreal, &status); + #endif + +// pFilereal = fopen (fftfile2,"wb"); +// pFileimg = fopen (fftfile3,"wb"); + +// fclose(pFilereal); +// fclose(pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); @@ -947,19 +992,41 @@ if(rank == 0){ #endif if(isector == rank) { - printf("%d writing\n",isector); - pFilereal = fopen (fftfile2,"ab"); - pFileimg = fopen (fftfile3,"ab"); - - long global_index = isector*(xaxis*yaxis)*sizeof(double); + #ifdef FITSIO - fseek(pFilereal, global_index, SEEK_SET); - fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); - fseek(pFileimg, global_index, SEEK_SET); - fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg); - - fclose(pFilereal); - fclose(pFileimg); + printf("%d writing\n",isector); + long * fpixel = (long *) malloc(sizeof(long)*naxis); + long * lpixel = (long *) malloc(sizeof(long)*naxis); + + fpixel[0] = 1; + lpixel[0] = xaxis; + fpixel[1] = isector*yaxis+1; + lpixel[1] = (isector+1)*yaxis; + + status = 0; + fits_open_image(&fptreal, testfitsreal, READWRITE, &status); + fits_write_subset(fptreal, TDOUBLE, fpixel, lpixel, image_real, &status); + fits_close_file(fptreal, &status); + + status = 0; + fits_open_image(&fptrimg, testfitsimag, READWRITE, &status); + fits_write_subset(fptrimg, TDOUBLE, fpixel, lpixel, image_imag, &status); + fits_close_file(fptrimg, &status); + + #endif + +// pFilereal = fopen (fftfile2,"ab"); +// pFileimg = fopen (fftfile3,"ab"); + +// long global_index = isector*(xaxis*yaxis)*sizeof(double); + +// fseek(pFilereal, global_index, SEEK_SET); +// fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); +// fseek(pFileimg, global_index, SEEK_SET); +// fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg); + +// fclose(pFilereal); +// fclose(pFileimg); } } #ifdef USE_MPI -- GitLab From 4760ede661134e6c64d987cb6211f9c7b742de71 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Tue, 24 Jan 2023 17:01:01 +0100 Subject: [PATCH 05/11] CFITSIO implemented --- w-stacking-fftw-cfitsio.c | 1089 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1089 insertions(+) create mode 100644 w-stacking-fftw-cfitsio.c diff --git a/w-stacking-fftw-cfitsio.c b/w-stacking-fftw-cfitsio.c new file mode 100644 index 0000000..ebba5b3 --- /dev/null +++ b/w-stacking-fftw-cfitsio.c @@ -0,0 +1,1089 @@ +#include +#include +#include +#ifdef FITSIO +#include "/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h" +#endif +#ifdef USE_MPI +#include +#ifdef USE_FFTW +#include +#endif +#endif +#include +#include +#include +#include +#ifdef ACCOMP +#include "w-stacking_omp.h" +#else +#include "w-stacking.h" +#endif +#define PI 3.14159265359 +#define NUM_OF_SECTORS -1 +#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) +#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) +#define NOVERBOSE +#define NFILES 100 +#define FILENAMELENGTH 30 + +// Linked List set-up +struct sectorlist { + long index; + struct sectorlist * next; +}; + +void Push(struct sectorlist** headRef, long data) { + struct sectorlist* newNode = malloc(sizeof(struct sectorlist)); + newNode->index = data; + newNode->next = *headRef; + *headRef = newNode; +} + +// Main Code +int main(int argc, char * argv[]) +{ + // Main MPI parameters + int rank; + int size; + + + // Define main filenames + FILE * pFile; + FILE * pFile1; + FILE * pFilereal; + FILE * pFileimg; + // Global filename to be composed + char filename[1000]; + + // MS paths + char datapath[900]; + char datapath_multi[NFILES][900]; + char ufile[FILENAMELENGTH] = "ucoord.bin"; + char vfile[FILENAMELENGTH] = "vcoord.bin"; + char wfile[FILENAMELENGTH] = "wcoord.bin"; + char weightsfile[FILENAMELENGTH] = "weights.bin"; + char visrealfile[FILENAMELENGTH] = "visibilities_real.bin"; + char visimgfile[FILENAMELENGTH] = "visibilities_img.bin"; + char metafile[FILENAMELENGTH] = "meta.txt"; + + // Mesh related files + char outfile[FILENAMELENGTH] = "grid.txt"; + char outfile1[FILENAMELENGTH] = "coords.txt"; + char outfile2[FILENAMELENGTH] = "grid_real.bin"; + char outfile3[FILENAMELENGTH] = "grid_img.bin"; + char fftfile[FILENAMELENGTH] = "fft.txt"; + char fftfile2[FILENAMELENGTH] = "fft_real.bin"; + char fftfile3[FILENAMELENGTH] = "fft_img.bin"; + char logfile[FILENAMELENGTH] = "run.log"; + char extension[FILENAMELENGTH] = ".txt"; + char srank[4]; + char timingfile[FILENAMELENGTH] = "timings.dat"; + + // Visibilities related variables + double * uu; + double * vv; + double * ww; + float * weights; + float * visreal; + float * visimg; + + long Nmeasures,Nmeasures0; + long Nvis,Nvis0; + long Nweights,Nweights0; + long freq_per_chan,freq_per_chan0; + long polarisations,polarisations0; + long Ntimes,Ntimes0; + double dt,dt0; + double thours,thours0; + long baselines,baselines0; + double uvmin,uvmin0; + double uvmax,uvmax0; + double wmin,wmin0; + double wmax,wmax0; + double resolution; + + // Mesh related parameters: global size + int grid_size_x = 2048; + int grid_size_y = 2048; + // Split Mesh size (auto-calculated) + int local_grid_size_x; + int local_grid_size_y; + int xaxis; + int yaxis; + + // Number of planes in the w direction + int num_w_planes = 8; + + // Size of the convoutional kernel support + int w_support = 7; + + // Number of OpenMP threads (input parameter) + int num_threads; + + // Resolution + double dx = 0.5/(double)grid_size_x; + double dw = 0.5/(double)num_w_planes; + // Half support size + double w_supporth = (double)((w_support-1)/2)*dx; + + + // Initialize FITS image parameters + fitsfile *fptreal; + fitsfile *fptrimg; + int status; + long nelements; +// long fpixel, lpixel; + char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; + char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; + + long naxis = 2; + long naxes[2] = { grid_size_x, grid_size_y }; + + nelements = naxes[0] * naxes[1]; + + + + + // Internal profiling parameters + clock_t start, end, start0, startk, endk; + double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time; + double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1, phase_time1; + double writetime, writetime1; + struct timespec begin, finish, begin0, begink, finishk; + double elapsed; + + // Number of sectors in which the mesh is divided along the v direction + // IF nsectors < 0, nsectors = NUMBER OF MPI RANKS + long nsectors = NUM_OF_SECTORS; + + + /* Get number of threads, exit if not given */ + if (argc == 1) { + fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]); + exit(1); + } + // Set the number of OpenMP threads + num_threads = atoi(argv[1]); + + // Number of threads <= 0 not acceptable + if (num_threads <= 0) { + fprintf(stderr, "Wrong parameter: %s\n\n", argv[1]); + fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]); + exit(1); + } + + // Internal profiling + clock_gettime(CLOCK_MONOTONIC, &begin0); + start0 = clock(); + +#ifdef USE_MPI + // Intialize MPI environment + MPI_Init(&argc,&argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if(rank == 0)printf("Running with %d MPI tasks\n",size); + #ifdef USE_FFTW + // Initialize FFTW environment + fftw_mpi_init(); + #endif +#else + // If MPI is not used the two parameters are set for consistency + rank = 0; + size = 1; +#endif + + if(rank == 0)printf("Running with %d threads\n",num_threads); + +#ifdef ACCOMP +if(rank == 0){ + if (0 == omp_get_num_devices()) { + printf("No accelerator found ... exit\n"); + exit(255); + } + printf("Number of available GPUs %d\n", omp_get_num_devices()); + #ifdef NVIDIA + prtAccelInfo(); + #endif + } +#endif + + // set the local size of the image + local_grid_size_x = grid_size_x; + if (nsectors < 0) nsectors = size; + local_grid_size_y = grid_size_y/nsectors; + //nsectors = size; + + // LOCAL grid size + xaxis = local_grid_size_x; + yaxis = local_grid_size_y; + clock_gettime(CLOCK_MONOTONIC, &begin); + start = clock(); + + // INPUT FILES (only the first ndatasets entries are used) + int ndatasets = 1; + strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); + //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); + //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); + //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); + + strcpy(datapath,datapath_multi[0]); + // Read metadata + strcpy(filename,datapath); + strcat(filename,metafile); + pFile = fopen (filename,"r"); + fscanf(pFile,"%ld",&Nmeasures); + fscanf(pFile,"%ld",&Nvis); + fscanf(pFile,"%ld",&freq_per_chan); + fscanf(pFile,"%ld",&polarisations); + fscanf(pFile,"%ld",&Ntimes); + fscanf(pFile,"%lf",&dt); + fscanf(pFile,"%lf",&thours); + fscanf(pFile,"%ld",&baselines); + fscanf(pFile,"%lf",&uvmin); + fscanf(pFile,"%lf",&uvmax); + fscanf(pFile,"%lf",&wmin); + fscanf(pFile,"%lf",&wmax); + fclose(pFile); + + + // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + int nsub = 1000; + //int nsub = 10; + printf("Subtracting last %d measurements\n",nsub); + Nmeasures = Nmeasures-nsub; + Nvis = Nmeasures*freq_per_chan*polarisations; + + // calculate the coordinates of the center + double uvshift = uvmin/(uvmax-uvmin); + //printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift); + + if (rank == 0) + { + printf("N. measurements %ld\n",Nmeasures); + printf("N. visibilities %ld\n",Nvis); + } + + // Set temporary local size of points + long nm_pe = (long)(Nmeasures/size); + long remaining = Nmeasures%size; + + long startrow = rank*nm_pe; + if (rank == size-1)nm_pe = nm_pe+remaining; + + long Nmeasures_tot = Nmeasures; + Nmeasures = nm_pe; + long Nvis_tot = Nvis; + Nvis = Nmeasures*freq_per_chan*polarisations; + Nweights = Nmeasures*polarisations; + +#ifdef VERBOSE + printf("N. measurements on %d %ld\n",rank,Nmeasures); + printf("N. visibilities on %d %ld\n",rank,Nvis); +#endif + + + // DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used + // all the sizes are rescaled by the number of MPI tasks + // Allocate arrays + uu = (double*) calloc(Nmeasures,sizeof(double)); + vv = (double*) calloc(Nmeasures,sizeof(double)); + ww = (double*) calloc(Nmeasures,sizeof(double)); + weights = (float*) calloc(Nweights,sizeof(float)); + visreal = (float*) calloc(Nvis,sizeof(float)); + visimg = (float*) calloc(Nvis,sizeof(float)); + + if(rank == 0)printf("READING DATA\n"); + // Read data + strcpy(filename,datapath); + strcat(filename,ufile); + //printf("Reading %s\n",filename); + + pFile = fopen (filename,"rb"); + fseek (pFile,startrow*sizeof(double),SEEK_SET); + fread(uu,Nmeasures*sizeof(double),1,pFile); + fclose(pFile); + + strcpy(filename,datapath); + strcat(filename,vfile); + //printf("Reading %s\n",filename); + + pFile = fopen (filename,"rb"); + fseek (pFile,startrow*sizeof(double),SEEK_SET); + fread(vv,Nmeasures*sizeof(double),1,pFile); + fclose(pFile); + + strcpy(filename,datapath); + strcat(filename,wfile); + //printf("Reading %s\n",filename); + + pFile = fopen (filename,"rb"); + fseek (pFile,startrow*sizeof(double),SEEK_SET); + fread(ww,Nmeasures*sizeof(double),1,pFile); + fclose(pFile); + +#ifdef USE_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + + clock_gettime(CLOCK_MONOTONIC, &finish); + end = clock(); + setup_time = ((double) (end - start)) / CLOCKS_PER_SEC; + setup_time1 = (finish.tv_sec - begin.tv_sec); + setup_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; + + + if(rank == 0)printf("GRIDDING DATA\n"); + + // Create histograms and linked lists + clock_gettime(CLOCK_MONOTONIC, &begin); + start = clock(); + + // Initialize linked list + struct sectorlist ** sectorhead; + sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist)); + for (int isec=0; isec<=nsectors; isec++) + { + sectorhead[isec] = malloc(sizeof(struct sectorlist)); + sectorhead[isec]->index = -1; + sectorhead[isec]->next = NULL; + } + + + long * histo_send = (long*) calloc(nsectors+1,sizeof(long)); + int * boundary = (int*) calloc(Nmeasures,sizeof(int)); + double uuh,vvh; + for (long iphi = 0; iphi < Nmeasures; iphi++) + { + boundary[iphi] = -1; + uuh = uu[iphi]; + vvh = vv[iphi]; + int binphi = (int)(vvh*nsectors); + // check if the point influence also neighboring slabs + double updist = (double)((binphi+1)*yaxis)*dx - vvh; + double downdist = vvh - (double)(binphi*yaxis)*dx; + // + histo_send[binphi]++; + Push(§orhead[binphi],iphi); + if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);}; + if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(§orhead[binphi-1],iphi);}; + } + +#ifdef PIPPO + struct sectorlist * current; + long iiii = 0; + for (int j=0; jindex != -1) + { + printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index); + current = current->next; + iiii++; + } + } +#endif + +#ifdef VERBOSE + for (int iii=0; iiiindex != -1) + { + long ilocal = current->index; + //double vvh = vv[ilocal]; + //int binphi = (int)(vvh*nsectors); + //if (binphi == isector || boundary[ilocal] == isector) { + uus[icount] = uu[ilocal]; + vvs[icount] = vv[ilocal]-isector*shift; + wws[icount] = ww[ilocal]; + for (long ipol=0; ipol1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis); + inu++; + } + icount++; + //} + current = current->next; + } + + clock_gettime(CLOCK_MONOTONIC, &finishk); + endk = clock(); + compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; + compose_time1 += (finishk.tv_sec - begink.tv_sec); + compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; + + #ifndef USE_MPI + double uumin = 1e20; + double vvmin = 1e20; + double uumax = -1e20; + double vvmax = -1e20; + + for (long ipart=0; ipart %f\n",gridss[iii]); + + #ifndef USE_MPI + long stride = isector*2*xaxis*yaxis*num_w_planes; + for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii]; + #endif + + // Write grid in the corresponding remote slab + #ifdef USE_MPI + int target_rank = (int)isector; + //int target_rank = (int)(size-isector-1); + #ifdef ONE_SIDE + printf("One Side communication active\n"); + MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin); + MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin); + MPI_Win_unlock(target_rank,slabwin); + //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin); + #else + MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD); + #endif //ONE_SIDE + #endif //USE_MPI + + clock_gettime(CLOCK_MONOTONIC, &finishk); + endk = clock(); + reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; + reduce_time1 += (finishk.tv_sec - begink.tv_sec); + reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; + // Go to next sector + for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; + + // Deallocate all sector arrays + free(uus); + free(vvs); + free(wws); + free(weightss); + free(visreals); + free(visimgs); + // End of loop over sectors + } + // End of loop over input files + } + + // Finalize MPI communication + #ifdef USE_MPI + MPI_Win_fence(0,slabwin); + #endif + + // Swap left and right parts APPARENTLY NOT NECESSARY + /* + for (long kswap=0; kswap 1) + { + for (int iw=0; iw n0) + // and perform the FFT per w plane + alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); + fftwgrid = fftw_alloc_complex(alloc_local); + plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); + + long fftwindex = 0; + long fftwindex2D = 0; + for (int iw=0; iw 1) + { + for (int iw=0; iw 1) + { + printf("PSetup time: %f sec\n",setup_time1); + printf("PProcess time: %f sec\n",process_time1); + printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1); +#ifdef USE_FFTW + printf("PFFTW time: %f sec\n",fftw_time1); + printf("PPhase time: %f sec\n",phase_time1); +#endif + printf("PTOT time: %f sec\n",tot_time1); + } + } + + if (rank == 0) + { + pFile = fopen (timingfile,"w"); + if (num_threads == 1) + { + fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time,kernel_time,compose_time,reduce_time,fftw_time,phase_time,tot_time); + } else { + fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time1,kernel_time1,compose_time1,reduce_time1,fftw_time1,phase_time1,tot_time1); + } + fclose(pFile); + } + + // Close MPI environment + #ifdef USE_MPI + MPI_Win_fence(0,slabwin); + MPI_Win_free(&slabwin); + MPI_Finalize(); + #endif +} -- GitLab From 9aed31901a0c13cc9430936cee0a61ea11043a10 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Sun, 29 Jan 2023 09:02:37 +0000 Subject: [PATCH 06/11] Delete w-stacking-fftw-cfitsio.c --- w-stacking-fftw-cfitsio.c | 1089 ------------------------------------- 1 file changed, 1089 deletions(-) delete mode 100644 w-stacking-fftw-cfitsio.c diff --git a/w-stacking-fftw-cfitsio.c b/w-stacking-fftw-cfitsio.c deleted file mode 100644 index ebba5b3..0000000 --- a/w-stacking-fftw-cfitsio.c +++ /dev/null @@ -1,1089 +0,0 @@ -#include -#include -#include -#ifdef FITSIO -#include "/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h" -#endif -#ifdef USE_MPI -#include -#ifdef USE_FFTW -#include -#endif -#endif -#include -#include -#include -#include -#ifdef ACCOMP -#include "w-stacking_omp.h" -#else -#include "w-stacking.h" -#endif -#define PI 3.14159265359 -#define NUM_OF_SECTORS -1 -#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) -#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) -#define NOVERBOSE -#define NFILES 100 -#define FILENAMELENGTH 30 - -// Linked List set-up -struct sectorlist { - long index; - struct sectorlist * next; -}; - -void Push(struct sectorlist** headRef, long data) { - struct sectorlist* newNode = malloc(sizeof(struct sectorlist)); - newNode->index = data; - newNode->next = *headRef; - *headRef = newNode; -} - -// Main Code -int main(int argc, char * argv[]) -{ - // Main MPI parameters - int rank; - int size; - - - // Define main filenames - FILE * pFile; - FILE * pFile1; - FILE * pFilereal; - FILE * pFileimg; - // Global filename to be composed - char filename[1000]; - - // MS paths - char datapath[900]; - char datapath_multi[NFILES][900]; - char ufile[FILENAMELENGTH] = "ucoord.bin"; - char vfile[FILENAMELENGTH] = "vcoord.bin"; - char wfile[FILENAMELENGTH] = "wcoord.bin"; - char weightsfile[FILENAMELENGTH] = "weights.bin"; - char visrealfile[FILENAMELENGTH] = "visibilities_real.bin"; - char visimgfile[FILENAMELENGTH] = "visibilities_img.bin"; - char metafile[FILENAMELENGTH] = "meta.txt"; - - // Mesh related files - char outfile[FILENAMELENGTH] = "grid.txt"; - char outfile1[FILENAMELENGTH] = "coords.txt"; - char outfile2[FILENAMELENGTH] = "grid_real.bin"; - char outfile3[FILENAMELENGTH] = "grid_img.bin"; - char fftfile[FILENAMELENGTH] = "fft.txt"; - char fftfile2[FILENAMELENGTH] = "fft_real.bin"; - char fftfile3[FILENAMELENGTH] = "fft_img.bin"; - char logfile[FILENAMELENGTH] = "run.log"; - char extension[FILENAMELENGTH] = ".txt"; - char srank[4]; - char timingfile[FILENAMELENGTH] = "timings.dat"; - - // Visibilities related variables - double * uu; - double * vv; - double * ww; - float * weights; - float * visreal; - float * visimg; - - long Nmeasures,Nmeasures0; - long Nvis,Nvis0; - long Nweights,Nweights0; - long freq_per_chan,freq_per_chan0; - long polarisations,polarisations0; - long Ntimes,Ntimes0; - double dt,dt0; - double thours,thours0; - long baselines,baselines0; - double uvmin,uvmin0; - double uvmax,uvmax0; - double wmin,wmin0; - double wmax,wmax0; - double resolution; - - // Mesh related parameters: global size - int grid_size_x = 2048; - int grid_size_y = 2048; - // Split Mesh size (auto-calculated) - int local_grid_size_x; - int local_grid_size_y; - int xaxis; - int yaxis; - - // Number of planes in the w direction - int num_w_planes = 8; - - // Size of the convoutional kernel support - int w_support = 7; - - // Number of OpenMP threads (input parameter) - int num_threads; - - // Resolution - double dx = 0.5/(double)grid_size_x; - double dw = 0.5/(double)num_w_planes; - // Half support size - double w_supporth = (double)((w_support-1)/2)*dx; - - - // Initialize FITS image parameters - fitsfile *fptreal; - fitsfile *fptrimg; - int status; - long nelements; -// long fpixel, lpixel; - char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; - char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; - - long naxis = 2; - long naxes[2] = { grid_size_x, grid_size_y }; - - nelements = naxes[0] * naxes[1]; - - - - - // Internal profiling parameters - clock_t start, end, start0, startk, endk; - double setup_time, process_time, mpi_time, fftw_time, tot_time, kernel_time, reduce_time, compose_time, phase_time; - double setup_time1, process_time1, mpi_time1, fftw_time1, tot_time1, kernel_time1, reduce_time1, compose_time1, phase_time1; - double writetime, writetime1; - struct timespec begin, finish, begin0, begink, finishk; - double elapsed; - - // Number of sectors in which the mesh is divided along the v direction - // IF nsectors < 0, nsectors = NUMBER OF MPI RANKS - long nsectors = NUM_OF_SECTORS; - - - /* Get number of threads, exit if not given */ - if (argc == 1) { - fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]); - exit(1); - } - // Set the number of OpenMP threads - num_threads = atoi(argv[1]); - - // Number of threads <= 0 not acceptable - if (num_threads <= 0) { - fprintf(stderr, "Wrong parameter: %s\n\n", argv[1]); - fprintf(stderr, "Usage: %s number_of_OMP_Threads \n", argv[0]); - exit(1); - } - - // Internal profiling - clock_gettime(CLOCK_MONOTONIC, &begin0); - start0 = clock(); - -#ifdef USE_MPI - // Intialize MPI environment - MPI_Init(&argc,&argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if(rank == 0)printf("Running with %d MPI tasks\n",size); - #ifdef USE_FFTW - // Initialize FFTW environment - fftw_mpi_init(); - #endif -#else - // If MPI is not used the two parameters are set for consistency - rank = 0; - size = 1; -#endif - - if(rank == 0)printf("Running with %d threads\n",num_threads); - -#ifdef ACCOMP -if(rank == 0){ - if (0 == omp_get_num_devices()) { - printf("No accelerator found ... exit\n"); - exit(255); - } - printf("Number of available GPUs %d\n", omp_get_num_devices()); - #ifdef NVIDIA - prtAccelInfo(); - #endif - } -#endif - - // set the local size of the image - local_grid_size_x = grid_size_x; - if (nsectors < 0) nsectors = size; - local_grid_size_y = grid_size_y/nsectors; - //nsectors = size; - - // LOCAL grid size - xaxis = local_grid_size_x; - yaxis = local_grid_size_y; - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - - // INPUT FILES (only the first ndatasets entries are used) - int ndatasets = 1; - strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); - //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/newgauss4_t201806301100_SBL180.binMS/"); - //strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); - //strcpy(datapath_multi[1],"/m100_scratch/userexternal/cgheller/gridding/Lofar/L798046_SB244_uv.uncorr_130B27932t_134MHz.pre-cal.binMS/"); - - strcpy(datapath,datapath_multi[0]); - // Read metadata - strcpy(filename,datapath); - strcat(filename,metafile); - pFile = fopen (filename,"r"); - fscanf(pFile,"%ld",&Nmeasures); - fscanf(pFile,"%ld",&Nvis); - fscanf(pFile,"%ld",&freq_per_chan); - fscanf(pFile,"%ld",&polarisations); - fscanf(pFile,"%ld",&Ntimes); - fscanf(pFile,"%lf",&dt); - fscanf(pFile,"%lf",&thours); - fscanf(pFile,"%ld",&baselines); - fscanf(pFile,"%lf",&uvmin); - fscanf(pFile,"%lf",&uvmax); - fscanf(pFile,"%lf",&wmin); - fscanf(pFile,"%lf",&wmax); - fclose(pFile); - - - // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - int nsub = 1000; - //int nsub = 10; - printf("Subtracting last %d measurements\n",nsub); - Nmeasures = Nmeasures-nsub; - Nvis = Nmeasures*freq_per_chan*polarisations; - - // calculate the coordinates of the center - double uvshift = uvmin/(uvmax-uvmin); - //printf("UVSHIFT %f %f %f %f %f\n",uvmin, uvmax, wmin, wmax, uvshift); - - if (rank == 0) - { - printf("N. measurements %ld\n",Nmeasures); - printf("N. visibilities %ld\n",Nvis); - } - - // Set temporary local size of points - long nm_pe = (long)(Nmeasures/size); - long remaining = Nmeasures%size; - - long startrow = rank*nm_pe; - if (rank == size-1)nm_pe = nm_pe+remaining; - - long Nmeasures_tot = Nmeasures; - Nmeasures = nm_pe; - long Nvis_tot = Nvis; - Nvis = Nmeasures*freq_per_chan*polarisations; - Nweights = Nmeasures*polarisations; - -#ifdef VERBOSE - printf("N. measurements on %d %ld\n",rank,Nmeasures); - printf("N. visibilities on %d %ld\n",rank,Nvis); -#endif - - - // DAV: all these arrays can be allocatate statically for the sake of optimization. However be careful that if MPI is used - // all the sizes are rescaled by the number of MPI tasks - // Allocate arrays - uu = (double*) calloc(Nmeasures,sizeof(double)); - vv = (double*) calloc(Nmeasures,sizeof(double)); - ww = (double*) calloc(Nmeasures,sizeof(double)); - weights = (float*) calloc(Nweights,sizeof(float)); - visreal = (float*) calloc(Nvis,sizeof(float)); - visimg = (float*) calloc(Nvis,sizeof(float)); - - if(rank == 0)printf("READING DATA\n"); - // Read data - strcpy(filename,datapath); - strcat(filename,ufile); - //printf("Reading %s\n",filename); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*sizeof(double),SEEK_SET); - fread(uu,Nmeasures*sizeof(double),1,pFile); - fclose(pFile); - - strcpy(filename,datapath); - strcat(filename,vfile); - //printf("Reading %s\n",filename); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*sizeof(double),SEEK_SET); - fread(vv,Nmeasures*sizeof(double),1,pFile); - fclose(pFile); - - strcpy(filename,datapath); - strcat(filename,wfile); - //printf("Reading %s\n",filename); - - pFile = fopen (filename,"rb"); - fseek (pFile,startrow*sizeof(double),SEEK_SET); - fread(ww,Nmeasures*sizeof(double),1,pFile); - fclose(pFile); - -#ifdef USE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - - clock_gettime(CLOCK_MONOTONIC, &finish); - end = clock(); - setup_time = ((double) (end - start)) / CLOCKS_PER_SEC; - setup_time1 = (finish.tv_sec - begin.tv_sec); - setup_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; - - - if(rank == 0)printf("GRIDDING DATA\n"); - - // Create histograms and linked lists - clock_gettime(CLOCK_MONOTONIC, &begin); - start = clock(); - - // Initialize linked list - struct sectorlist ** sectorhead; - sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist)); - for (int isec=0; isec<=nsectors; isec++) - { - sectorhead[isec] = malloc(sizeof(struct sectorlist)); - sectorhead[isec]->index = -1; - sectorhead[isec]->next = NULL; - } - - - long * histo_send = (long*) calloc(nsectors+1,sizeof(long)); - int * boundary = (int*) calloc(Nmeasures,sizeof(int)); - double uuh,vvh; - for (long iphi = 0; iphi < Nmeasures; iphi++) - { - boundary[iphi] = -1; - uuh = uu[iphi]; - vvh = vv[iphi]; - int binphi = (int)(vvh*nsectors); - // check if the point influence also neighboring slabs - double updist = (double)((binphi+1)*yaxis)*dx - vvh; - double downdist = vvh - (double)(binphi*yaxis)*dx; - // - histo_send[binphi]++; - Push(§orhead[binphi],iphi); - if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(§orhead[binphi+1],iphi);}; - if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(§orhead[binphi-1],iphi);}; - } - -#ifdef PIPPO - struct sectorlist * current; - long iiii = 0; - for (int j=0; jindex != -1) - { - printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index); - current = current->next; - iiii++; - } - } -#endif - -#ifdef VERBOSE - for (int iii=0; iiiindex != -1) - { - long ilocal = current->index; - //double vvh = vv[ilocal]; - //int binphi = (int)(vvh*nsectors); - //if (binphi == isector || boundary[ilocal] == isector) { - uus[icount] = uu[ilocal]; - vvs[icount] = vv[ilocal]-isector*shift; - wws[icount] = ww[ilocal]; - for (long ipol=0; ipol1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis); - inu++; - } - icount++; - //} - current = current->next; - } - - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - compose_time1 += (finishk.tv_sec - begink.tv_sec); - compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - - #ifndef USE_MPI - double uumin = 1e20; - double vvmin = 1e20; - double uumax = -1e20; - double vvmax = -1e20; - - for (long ipart=0; ipart %f\n",gridss[iii]); - - #ifndef USE_MPI - long stride = isector*2*xaxis*yaxis*num_w_planes; - for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii]; - #endif - - // Write grid in the corresponding remote slab - #ifdef USE_MPI - int target_rank = (int)isector; - //int target_rank = (int)(size-isector-1); - #ifdef ONE_SIDE - printf("One Side communication active\n"); - MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin); - MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin); - MPI_Win_unlock(target_rank,slabwin); - //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin); - #else - MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD); - #endif //ONE_SIDE - #endif //USE_MPI - - clock_gettime(CLOCK_MONOTONIC, &finishk); - endk = clock(); - reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC; - reduce_time1 += (finishk.tv_sec - begink.tv_sec); - reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0; - // Go to next sector - for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0; - - // Deallocate all sector arrays - free(uus); - free(vvs); - free(wws); - free(weightss); - free(visreals); - free(visimgs); - // End of loop over sectors - } - // End of loop over input files - } - - // Finalize MPI communication - #ifdef USE_MPI - MPI_Win_fence(0,slabwin); - #endif - - // Swap left and right parts APPARENTLY NOT NECESSARY - /* - for (long kswap=0; kswap 1) - { - for (int iw=0; iw n0) - // and perform the FFT per w plane - alloc_local = fftw_mpi_local_size_2d(grid_size_y, grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); - fftwgrid = fftw_alloc_complex(alloc_local); - plan = fftw_mpi_plan_dft_2d(grid_size_y, grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); - - long fftwindex = 0; - long fftwindex2D = 0; - for (int iw=0; iw 1) - { - for (int iw=0; iw 1) - { - printf("PSetup time: %f sec\n",setup_time1); - printf("PProcess time: %f sec\n",process_time1); - printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",kernel_time1,compose_time1,reduce_time1); -#ifdef USE_FFTW - printf("PFFTW time: %f sec\n",fftw_time1); - printf("PPhase time: %f sec\n",phase_time1); -#endif - printf("PTOT time: %f sec\n",tot_time1); - } - } - - if (rank == 0) - { - pFile = fopen (timingfile,"w"); - if (num_threads == 1) - { - fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time,kernel_time,compose_time,reduce_time,fftw_time,phase_time,tot_time); - } else { - fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time1,kernel_time1,compose_time1,reduce_time1,fftw_time1,phase_time1,tot_time1); - } - fclose(pFile); - } - - // Close MPI environment - #ifdef USE_MPI - MPI_Win_fence(0,slabwin); - MPI_Win_free(&slabwin); - MPI_Finalize(); - #endif -} -- GitLab From 736c2f2adce1de3ea5560cbb05d37f5a8a3486c6 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Sun, 29 Jan 2023 09:32:15 +0000 Subject: [PATCH 07/11] Full working CFITSIO parallel fits writing implementation --- w-stacking-fftw.c | 54 +++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 6e00e73..a2d8151 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -122,8 +122,8 @@ int main(int argc, char * argv[]) int num_threads; // Resolution - double dx = 0.5/(double)grid_size_x; - double dw = 0.5/(double)num_w_planes; + double dx = 1.0/(double)grid_size_x; + double dw = 1.0/(double)num_w_planes; // Half support size double w_supporth = (double)((w_support-1)/2)*dx; @@ -133,9 +133,9 @@ int main(int argc, char * argv[]) fitsfile *fptrimg; int status; long nelements; - long fpixel, lpixel; - char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; - char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; +// long fpixel, lpixel; + char testfitsreal[FILENAMELENGTH] = "parallel_np4_real.fits"; + char testfitsimag[FILENAMELENGTH] = "parallel_np4_img.fits"; long naxis = 2; long naxes[2] = { grid_size_x, grid_size_y }; @@ -217,7 +217,6 @@ if(rank == 0){ // LOCAL grid size xaxis = local_grid_size_x; yaxis = local_grid_size_y; - clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); @@ -975,34 +974,39 @@ if(rank == 0){ fits_close_file(fptreal, &status); #endif -// pFilereal = fopen (fftfile2,"wb"); -// pFileimg = fopen (fftfile3,"wb"); - -// fclose(pFilereal); -// fclose(pFileimg); + pFilereal = fopen (fftfile2, "wb"); + pFileimg = fopen (fftfile3, "wb"); + fclose(pFilereal); + fclose(pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); + long * fpixel = (long *) malloc(sizeof(long)*naxis); + long * lpixel = (long *) malloc(sizeof(long)*naxis); + for (int isector=0; isector Date: Sun, 29 Jan 2023 13:26:17 +0100 Subject: [PATCH 08/11] Full working parallel fits writing with CFITSIO --- w-stacking-fftw.c | 51 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index a2d8151..2fe8a6f 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -985,28 +985,65 @@ if(rank == 0){ if(rank == 0)printf("WRITING IMAGE\n"); long * fpixel = (long *) malloc(sizeof(long)*naxis); long * lpixel = (long *) malloc(sizeof(long)*naxis); - + + #ifdef USE_MPI + MPI_Barrier(MPI_COMM_WORLD); + #endif + #ifdef PARALLEL_FITS + #ifdef FITSIO + + fpixel[0] = 1; + fpixel[1] = rank*yaxis+1; + lpixel[0] = xaxis; + lpixel[1] = (rank+1)*yaxis; + + status = 0; + fits_open_image(&fptreal, testfitsreal, READWRITE, &status); + fits_write_subset(fptreal, TDOUBLE, fpixel, lpixel, image_real, &status); + fits_close_file(fptreal, &status); + + status = 0; + fits_open_image(&fptrimg, testfitsimag, READWRITE, &status); + fits_write_subset(fptrimg, TDOUBLE, fpixel, lpixel, image_imag, &status); + fits_close_file(fptrimg, &status); + + #endif + + pFilereal = fopen (fftfile2,"ab"); + pFileimg = fopen (fftfile3,"ab"); + + long global_index = rank*(xaxis*yaxis)*sizeof(double); + + fseek(pFilereal, global_index, SEEK_SET); + fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal); + fseek(pFileimg, global_index, SEEK_SET); + fwrite(image_imag, xaxis*yaxis, sizeof(double), pFileimg); + + fclose(pFilereal); + fclose(pFileimg); + + #ifdef USE_MPI + MPI_Barrier(MPI_COMM_WORLD); + #endif + #else for (int isector=0; isector Date: Sun, 29 Jan 2023 13:27:50 +0100 Subject: [PATCH 09/11] Makefile with PARALLEL_FITS implementation --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c034046..472d5ff 100644 --- a/Makefile +++ b/Makefile @@ -33,9 +33,11 @@ endif # write the final image OPT += -DWRITE_IMAGE # perform w-stacking phase correction -#OPT += -DPHASE_ON +OPT += -DPHASE_ON # Support CFITSIO OPT += -DFITSIO +# Perform true parallel fits writing +OPT += -DPARALLEL_FITS ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio -- GitLab From 48732cc6eac1b9690c814d20b68fb088e80d356e Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Wed, 1 Feb 2023 10:36:26 +0100 Subject: [PATCH 10/11] PARALLELIO implementation --- Makefile | 2 +- w-stacking-fftw.c | 31 +++++++++++-------------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 472d5ff..928bb59 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ OPT += -DPHASE_ON # Support CFITSIO OPT += -DFITSIO # Perform true parallel fits writing -OPT += -DPARALLEL_FITS +OPT += -DPARALLELIO ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 2fe8a6f..93e520d 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -2,7 +2,7 @@ #include #include #ifdef FITSIO -#include "/m100/home/userexternal/ederubei/cfitsio-3.49/fitsio.h" +#include "fitsio.h" #endif #ifdef USE_MPI #include @@ -104,8 +104,8 @@ int main(int argc, char * argv[]) double resolution; // Mesh related parameters: global size - int grid_size_x = 2048; - int grid_size_y = 2048; + int grid_size_x = 4096; + int grid_size_y = 4096; // Split Mesh size (auto-calculated) int local_grid_size_x; int local_grid_size_y; @@ -132,17 +132,12 @@ int main(int argc, char * argv[]) fitsfile *fptreal; fitsfile *fptrimg; int status; - long nelements; -// long fpixel, lpixel; - char testfitsreal[FILENAMELENGTH] = "parallel_np4_real.fits"; - char testfitsimag[FILENAMELENGTH] = "parallel_np4_img.fits"; + char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; + char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; long naxis = 2; long naxes[2] = { grid_size_x, grid_size_y }; - nelements = naxes[0] * naxes[1]; - - // Internal profiling parameters @@ -958,19 +953,17 @@ if(rank == 0){ remove(testfitsimag); - printf("FITS CREATING\n"); + printf("FITS CREATION\n"); status = 0; fits_create_file(&fptrimg, testfitsimag, &status); fits_create_img(fptrimg, DOUBLE_IMG, naxis, naxes, &status); -// fits_write_img(fptrimg, TDOUBLE, fpixel, nelements, image_imag, &status); fits_close_file(fptrimg, &status); status = 0; fits_create_file(&fptreal, testfitsreal, &status); fits_create_img(fptreal, DOUBLE_IMG, naxis, naxes, &status); -// fits_write_img(fptreal, TDOUBLE, fpixel, nelements, image_real, &status); fits_close_file(fptreal, &status); #endif @@ -989,7 +982,7 @@ if(rank == 0){ #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif - #ifdef PARALLEL_FITS + #ifdef PARALLELIO #ifdef FITSIO fpixel[0] = 1; @@ -1007,7 +1000,7 @@ if(rank == 0){ fits_write_subset(fptrimg, TDOUBLE, fpixel, lpixel, image_imag, &status); fits_close_file(fptrimg, &status); - #endif + #endif //FITSIO pFilereal = fopen (fftfile2,"ab"); pFileimg = fopen (fftfile3,"ab"); @@ -1036,9 +1029,7 @@ if(rank == 0){ #ifdef FITSIO printf("%d writing\n",isector); - //long * fpixel = (long *) malloc(sizeof(long)*naxis); - //long * lpixel = (long *) malloc(sizeof(long)*naxis); - + fpixel[0] = 1; fpixel[1] = isector*yaxis+1; lpixel[0] = xaxis; @@ -1054,7 +1045,7 @@ if(rank == 0){ fits_write_subset(fptrimg, TDOUBLE, fpixel, lpixel, image_imag, &status); fits_close_file(fptrimg, &status); - #endif + #endif //FITSIO pFilereal = fopen (fftfile2,"ab"); pFileimg = fopen (fftfile3,"ab"); @@ -1070,7 +1061,7 @@ if(rank == 0){ fclose(pFileimg); } } - #endif + #endif //PARALLELIO #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif -- GitLab From b48befbc99c9b97c3eb78966eebf6abd18739d39 Mon Sep 17 00:00:00 2001 From: Emanuele De Rubeis Date: Wed, 1 Feb 2023 15:26:57 +0100 Subject: [PATCH 11/11] PARALLELIO and CFITSIO implementation --- Makefile | 4 ++-- w-stacking-fftw.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 928bb59..f632e6e 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,8 @@ OPT += -DWRITE_IMAGE OPT += -DPHASE_ON # Support CFITSIO OPT += -DFITSIO -# Perform true parallel fits writing -OPT += -DPARALLELIO +# Perform true parallel images writing +#OPT += -DPARALLELIO ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 93e520d..55244ba 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -104,8 +104,8 @@ int main(int argc, char * argv[]) double resolution; // Mesh related parameters: global size - int grid_size_x = 4096; - int grid_size_y = 4096; + int grid_size_x = 2048; + int grid_size_y = 2048; // Split Mesh size (auto-calculated) int local_grid_size_x; int local_grid_size_y; -- GitLab