Loading Makefile +5 −7 Original line number Diff line number Diff line Loading @@ -33,22 +33,18 @@ OPT += -DNVIDIA # write the final image OPT += -DWRITE_IMAGE # perform w-stacking phase correction <<<<<<< HEAD OPT += -DPHASE_ON # GPU support for FFT using cuFFTMP OPT += -DCUDA_FFT # Support CFITSIO ======= #OPT += -DPHASE_ON # Support CFITSIO !!! Remember to add the path to the CFITSIO library to LD_LIBRARY_PATH >>>>>>> main #OPT += -DFITSIO # Perform true parallel images writing #OPT += -DPARALLELIO # Normalize uvw in case it is not done in the binMS OPT += -DNORMALIZE_UVW #OPT += -DNORMALIZE_UVW # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL OPT += -DGAUSS_HI_PRECISION OPT += -DGAUSS #OPT += -DVERBOSE ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio Loading Loading @@ -100,3 +96,5 @@ clean: rm *.o rm w-stacking.c rm phase_correction.c # module load spectrum_mpi/10.3.1--binary && module load fftw && module load cuda/11.3 && module load profile/candidate && module load hpc-sdk/2022--binary && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/m100_work/IscrC_CD-DLS/cfitsio-3.49 \ # && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/cineca/prod/opt/compilers/hpc-sdk/2022/binary/Linux_ppc64le/2022/math_libs/11.8/lib64 && export LD_LIBRARY_PATH phase_correction.cu +1 −1 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in // WARNING: nbucket MUST be chosen such that xaxis*yaxis*num_w_planes is a multiple of nbucket int nbucket = 1; int Nth = NTHREADS; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; long Nbl = (long)((num_w_planes*xaxis*yaxis)/nbucket/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); Loading w-stacking-fftw.c +13 −21 Original line number Diff line number Diff line Loading @@ -106,8 +106,8 @@ int main(int argc, char * argv[]) double resolution; // Mesh related parameters: global size int grid_size_x = 4096; int grid_size_y = 4096; int grid_size_x = 8192; int grid_size_y = 8192; // Split Mesh size (auto-calculated) int local_grid_size_x; int local_grid_size_y; Loading @@ -115,7 +115,7 @@ int main(int argc, char * argv[]) int yaxis; // Number of planes in the w direction int num_w_planes = 1; int num_w_planes = 2; // Size of the convoutional kernel support int w_support = 7; Loading @@ -131,23 +131,13 @@ int main(int argc, char * argv[]) // Initialize FITS image parameters <<<<<<< HEAD #ifdef FITSIO fitsfile *fptreal; fitsfile *fptrimg; int status; char testfitsreal[FILENAMELENGTH] = "parallel_np8_real.fits"; char testfitsimag[FILENAMELENGTH] = "parallel_np8_img.fits"; ======= #ifdef FITSIO fitsfile *fptreal; fitsfile *fptrimg; int status; char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; #endif >>>>>>> main char testfitsreal[FILENAMELENGTH] = "parallel_real.fits"; char testfitsimag[FILENAMELENGTH] = "parallel_img.fits"; long naxis = 2; long naxes[2] = { grid_size_x, grid_size_y }; Loading Loading @@ -241,7 +231,9 @@ if(rank == 0){ // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/SKA3/ZW2_IFRQ_0-1-5.binMS/"); // strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/SKA3/ZW2_IFRQ_0-1-5.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); // Read metadata Loading @@ -266,7 +258,7 @@ if(rank == 0){ // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int nsub = 1000; //int nsub = 10; if(rank == 0)printf("Subtracting last %d measurements\n",nsub); printf("Subtracting last %d measurements\n",nsub); Nmeasures = Nmeasures-nsub; Nvis = Nmeasures*freq_per_chan*polarisations; Loading Loading @@ -504,7 +496,7 @@ if(rank == 0){ for (int ifiles=0; ifiles<ndatasets; ifiles++) { strcpy(filename,datapath_multi[ifiles]); if(rank == 0)printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); // Read metadata strcpy(filename,datapath); Loading @@ -528,7 +520,7 @@ if(rank == 0){ resolution = 1.0/MAX(abs(uvmin),abs(uvmax)); // calculate the resolution in arcsec double resolution_asec = (3600.0*180.0)/MAX(abs(uvmin),abs(uvmax))/PI; if(rank == 0)printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); strcpy(filename,datapath); strcat(filename,weightsfile); Loading w-stacking.cu +9 −5 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ void makeKaiserBesselKernel(double * kernel, double overSamplingFactor, int withSinc) { int n = increaseprecision*KernelLen, mid = n / 2; double * sincKernel = malloc((mid + 1)*sizeof(*sincKernel)); double * sincKernel = (double*)malloc((mid + 1) * sizeof(*sincKernel)); const double filterRatio = 1.0 / overSamplingFactor; sincKernel[0] = filterRatio; for (int i = 1; i != mid + 1; i++) { Loading Loading @@ -209,7 +209,7 @@ void wstack( double std = 1.0; double std22 = 1.0/(2.0*std*std); double norm = std22/PI; double * convkernel = malloc(increaseprecision*w_support*sizeof(*convkernel)); double * convkernel = (double*)malloc(increaseprecision*w_support*sizeof(*convkernel)); double overSamplingFactor = 1.0; int withSinc = 0; double alpha = 8.6; Loading Loading @@ -249,7 +249,10 @@ void wstack( mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float)); mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float)); mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);} mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);} mmm=cudaMemcpy(uu_g, uu, num_points*sizeof(double), cudaMemcpyHostToDevice); mmm=cudaMemcpy(vv_g, vv, num_points*sizeof(double), cudaMemcpyHostToDevice); Loading @@ -257,6 +260,7 @@ void wstack( mmm=cudaMemcpy(vis_real_g, vis_real, Nvis*sizeof(float), cudaMemcpyHostToDevice); mmm=cudaMemcpy(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice); mmm=cudaMemcpy(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpy ERROR %d !!!\n", mmm);} // Call main GPU Kernel convolve_g <<<Nbl,Nth>>> ( Loading Loading
Makefile +5 −7 Original line number Diff line number Diff line Loading @@ -33,22 +33,18 @@ OPT += -DNVIDIA # write the final image OPT += -DWRITE_IMAGE # perform w-stacking phase correction <<<<<<< HEAD OPT += -DPHASE_ON # GPU support for FFT using cuFFTMP OPT += -DCUDA_FFT # Support CFITSIO ======= #OPT += -DPHASE_ON # Support CFITSIO !!! Remember to add the path to the CFITSIO library to LD_LIBRARY_PATH >>>>>>> main #OPT += -DFITSIO # Perform true parallel images writing #OPT += -DPARALLELIO # Normalize uvw in case it is not done in the binMS OPT += -DNORMALIZE_UVW #OPT += -DNORMALIZE_UVW # Gridding kernel: GAUSS, GAUSS_HI_PRECISION, KAISERBESSEL OPT += -DGAUSS_HI_PRECISION OPT += -DGAUSS #OPT += -DVERBOSE ifeq (FITSIO,$(findstring FITSIO,$(OPT))) LIBS += -L$(FITSIO_LIB) -lcfitsio Loading Loading @@ -100,3 +96,5 @@ clean: rm *.o rm w-stacking.c rm phase_correction.c # module load spectrum_mpi/10.3.1--binary && module load fftw && module load cuda/11.3 && module load profile/candidate && module load hpc-sdk/2022--binary && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/m100_work/IscrC_CD-DLS/cfitsio-3.49 \ # && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/cineca/prod/opt/compilers/hpc-sdk/2022/binary/Linux_ppc64le/2022/math_libs/11.8/lib64 && export LD_LIBRARY_PATH
phase_correction.cu +1 −1 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in // WARNING: nbucket MUST be chosen such that xaxis*yaxis*num_w_planes is a multiple of nbucket int nbucket = 1; int Nth = NTHREADS; long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth/nbucket) + 1; long Nbl = (long)((num_w_planes*xaxis*yaxis)/nbucket/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl); Loading
w-stacking-fftw.c +13 −21 Original line number Diff line number Diff line Loading @@ -106,8 +106,8 @@ int main(int argc, char * argv[]) double resolution; // Mesh related parameters: global size int grid_size_x = 4096; int grid_size_y = 4096; int grid_size_x = 8192; int grid_size_y = 8192; // Split Mesh size (auto-calculated) int local_grid_size_x; int local_grid_size_y; Loading @@ -115,7 +115,7 @@ int main(int argc, char * argv[]) int yaxis; // Number of planes in the w direction int num_w_planes = 1; int num_w_planes = 2; // Size of the convoutional kernel support int w_support = 7; Loading @@ -131,23 +131,13 @@ int main(int argc, char * argv[]) // Initialize FITS image parameters <<<<<<< HEAD #ifdef FITSIO fitsfile *fptreal; fitsfile *fptrimg; int status; char testfitsreal[FILENAMELENGTH] = "parallel_np8_real.fits"; char testfitsimag[FILENAMELENGTH] = "parallel_np8_img.fits"; ======= #ifdef FITSIO fitsfile *fptreal; fitsfile *fptrimg; int status; char testfitsreal[FILENAMELENGTH] = "parallel_np2_real.fits"; char testfitsimag[FILENAMELENGTH] = "parallel_np2_img.fits"; #endif >>>>>>> main char testfitsreal[FILENAMELENGTH] = "parallel_real.fits"; char testfitsimag[FILENAMELENGTH] = "parallel_img.fits"; long naxis = 2; long naxes[2] = { grid_size_x, grid_size_y }; Loading Loading @@ -241,7 +231,9 @@ if(rank == 0){ // INPUT FILES (only the first ndatasets entries are used) int ndatasets = 1; strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/SKA3/ZW2_IFRQ_0-1-5.binMS/"); // strcpy(datapath_multi[0],"/m100_scratch/userexternal/cgheller/SKA3/ZW2_IFRQ_0-1-5.binMS/"); strcpy(datapath_multi[0],"/m100_scratch/userexternal/ederubei/L798046_SB244_uv.uncorr_130B27932t_146MHz.pre-cal.binMS/"); strcpy(datapath,datapath_multi[0]); // Read metadata Loading @@ -266,7 +258,7 @@ if(rank == 0){ // WATCH THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int nsub = 1000; //int nsub = 10; if(rank == 0)printf("Subtracting last %d measurements\n",nsub); printf("Subtracting last %d measurements\n",nsub); Nmeasures = Nmeasures-nsub; Nvis = Nmeasures*freq_per_chan*polarisations; Loading Loading @@ -504,7 +496,7 @@ if(rank == 0){ for (int ifiles=0; ifiles<ndatasets; ifiles++) { strcpy(filename,datapath_multi[ifiles]); if(rank == 0)printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); printf("Processing %s, %d of %d\n",filename,ifiles+1,ndatasets); // Read metadata strcpy(filename,datapath); Loading @@ -528,7 +520,7 @@ if(rank == 0){ resolution = 1.0/MAX(abs(uvmin),abs(uvmax)); // calculate the resolution in arcsec double resolution_asec = (3600.0*180.0)/MAX(abs(uvmin),abs(uvmax))/PI; if(rank == 0)printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec); strcpy(filename,datapath); strcat(filename,weightsfile); Loading
w-stacking.cu +9 −5 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ void makeKaiserBesselKernel(double * kernel, double overSamplingFactor, int withSinc) { int n = increaseprecision*KernelLen, mid = n / 2; double * sincKernel = malloc((mid + 1)*sizeof(*sincKernel)); double * sincKernel = (double*)malloc((mid + 1) * sizeof(*sincKernel)); const double filterRatio = 1.0 / overSamplingFactor; sincKernel[0] = filterRatio; for (int i = 1; i != mid + 1; i++) { Loading Loading @@ -209,7 +209,7 @@ void wstack( double std = 1.0; double std22 = 1.0/(2.0*std*std); double norm = std22/PI; double * convkernel = malloc(increaseprecision*w_support*sizeof(*convkernel)); double * convkernel = (double*)malloc(increaseprecision*w_support*sizeof(*convkernel)); double overSamplingFactor = 1.0; int withSinc = 0; double alpha = 8.6; Loading Loading @@ -249,7 +249,10 @@ void wstack( mmm=cudaMalloc(&vis_img_g,Nvis*sizeof(float)); mmm=cudaMalloc(&weight_g,(Nvis/freq_per_chan)*sizeof(float)); mmm=cudaMalloc(&grid_g,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMalloc ERROR %d !!!\n", mmm);} mmm=cudaMemset(grid_g,0.0,2*num_w_planes*grid_size_x*grid_size_y*sizeof(double)); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemset ERROR %d !!!\n", mmm);} mmm=cudaMemcpy(uu_g, uu, num_points*sizeof(double), cudaMemcpyHostToDevice); mmm=cudaMemcpy(vv_g, vv, num_points*sizeof(double), cudaMemcpyHostToDevice); Loading @@ -257,6 +260,7 @@ void wstack( mmm=cudaMemcpy(vis_real_g, vis_real, Nvis*sizeof(float), cudaMemcpyHostToDevice); mmm=cudaMemcpy(vis_img_g, vis_img, Nvis*sizeof(float), cudaMemcpyHostToDevice); mmm=cudaMemcpy(weight_g, weight, (Nvis/freq_per_chan)*sizeof(float), cudaMemcpyHostToDevice); if (mmm != cudaSuccess) {printf("!!! w-stacking.cu cudaMemcpy ERROR %d !!!\n", mmm);} // Call main GPU Kernel convolve_g <<<Nbl,Nth>>> ( Loading