Commit 79b9e1d3 authored by Claudio Gheller's avatar Claudio Gheller
Browse files

parallel MPI implementation. Still buggy at the boundary between sectors

parent 15e3f465
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ __global__ void convolve_g(
		// FIRST TERM IS CALCULATED, REMAINING TERMS ARE SKIPPED
	        // COMMENTED DOUBLE LOOP BELOW IS THE ORIGINAL GRIDDING ALGORITHM (FOR REFERENCE)
                vis_real[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer+1]*conv_weight;

		/*
                for (long ifreq=0; ifreq<freq_per_chan; ifreq++)
@@ -278,7 +278,7 @@ void degrid(
                // FIRST TERM IS CALCULATED, REMAINING TERMS ARE SKIPPED
                // COMMENTED DOUBLE LOOP BELOW IS THE ORIGINAL GRIDDING ALGORITHM (FOR REFERENCE)
                vis_real[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer+1]*conv_weight;

                /*
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
+2 −2
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ __global__ void convolve_g(
		// FIRST TERM IS CALCULATED, REMAINING TERMS ARE SKIPPED
	        // COMMENTED DOUBLE LOOP BELOW IS THE ORIGINAL GRIDDING ALGORITHM (FOR REFERENCE)
                vis_real[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer+1]*conv_weight;

		/*
                for (long ifreq=0; ifreq<freq_per_chan; ifreq++)
@@ -278,7 +278,7 @@ void degrid(
                // FIRST TERM IS CALCULATED, REMAINING TERMS ARE SKIPPED
                // COMMENTED DOUBLE LOOP BELOW IS THE ORIGINAL GRIDDING ALGORITHM (FOR REFERENCE)
                vis_real[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer]*conv_weight;
                vis_img[ifine] += grid[iKer+1]*conv_weight;

                /*
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
+142 −44
Original line number Diff line number Diff line
@@ -86,7 +86,7 @@ int main(int argc, char * argv[])
        #ifdef FITSIO
	char imagename[FILENAMELENGTH] = "test.fits";
        #else
	char imagename[FILENAMELENGTH] = "image_name.bin";
	char imagename[FILENAMELENGTH] = "test.bin";
        #endif

	// Visibilities related variables
@@ -354,19 +354,19 @@ if(rank == 0){
        int nocomments;
        char *header;
        fits_hdr2str(fptr,0,NULL,0, &header,&nkeys,&status);
        printf("Number of keywords in the Header = %d\n",nkeys);
        //printf("Number of keywords in the Header = %d\n",nkeys);
        int header_aux = (int)(80*nkeys/2880);
        int header_size = (header_aux+1)*2880;
        printf("Header size (bytes)              = %d\n\n",header_size);
        //printf("Header size (bytes)              = %d\n\n",header_size);

	// get image size
	int naxis;
	long * naxes;
	fits_get_img_dim(fptr, &naxis,  &status);
	printf("number of axis: %d\n",naxis);
	//printf("number of axis: %d\n",naxis);
	naxes = (long *)malloc(sizeof(long)*naxis);
	fits_get_img_size(fptr, naxis, naxes, &status);
	printf("image size (from FITS file): %d\n",naxes[0]);
	//printf("image size (from FITS file): %d\n",naxes[0]);
	if (naxes[0] != xaxis){
		printf("Wrong Image Size : %d vs %d\n",naxes[0],xaxis);
		exit(2);
@@ -377,12 +377,13 @@ if(rank == 0){
	long * inc = (long *) malloc(sizeof(long)*naxis);
	int anynul;

        fpixel[1] = 1;
	lpixel[1] = xaxis;
        inc[1] = 1;
	fpixel[0] = rank*yaxis+1;
	lpixel[0] = (rank+1)*yaxis;
	// WARNING in FITS x and y axis are swapped: x_fits --> y_code, y_fits --> x_code
        fpixel[0] = 1;
	lpixel[0] = xaxis;
        inc[0] = 1;
	fpixel[1] = rank*yaxis+1;
	lpixel[1] = (rank+1)*yaxis;
        inc[1] = 1;
	fits_read_subset(fptr, TDOUBLE, fpixel, lpixel, inc, NULL, image_real, &anynul, &status);
	//fits_read_img(fptr, TDOUBLE, 1, xaxis*yaxis, NULL, image_real, &anynul, &status);
	fits_close_file(fptr, &status);
@@ -398,9 +399,31 @@ if(rank == 0){
	// image read
	
        #ifdef WRITE_DATA
        pFilereal = fopen ("revtest.bin","wb");
	// each processor writes in a different file
	/*
	strcpy(filename,"./");
        strcat(filename,"revtest");
	char buffer[1];
	sprintf(buffer,"%d",rank);
        strcat(filename,buffer);
        strcat(filename,".bin");
	pFilereal = fopen (filename,"wb");
	fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal);
        fclose(pFilereal);
	*/
	// each processor writes in the same file	
	for (int irank=0; irank<size; irank++)
	{
            MPI_Barrier(MPI_COMM_WORLD);
	    if (irank == rank)
            {
               pFilereal = fopen ("revtest.bin","ab");
	       long global_index = rank*(xaxis*yaxis)*sizeof(double);
	       fseek(pFilereal, global_index, SEEK_SET);
	       fwrite(image_real, xaxis*yaxis, sizeof(double), pFilereal);
	       fclose(pFilereal);
	    }
	}
        #endif
	
        if(rank == 0)printf("FFT TRANSFORMING (from Real to Complex Fourier space)\n");
@@ -466,6 +489,28 @@ if(rank == 0){
               }
            }
        }
	// This is only for the moment
	fftwindex = 0;
        for (int iw=0; iw<num_w_planes; iw++)
        {
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
                   fftwindex2D = iu + iv*xaxis;
                   fftwindex2 = 2*(fftwindex);
                   //fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                   grid[fftwindex2] = image_real[fftwindex];
                   grid[fftwindex2+1] = image_real[fftwindex];
                   #ifdef WRITE_DATA
                   grid_real[fftwindex] = grid[fftwindex2];
                   grid_img[fftwindex] = grid[fftwindex2];
                   #endif
                   fftwindex++;
               }
            }
        }

	fftw_free(fftwgrid);
	free(fftwimage);

@@ -519,7 +564,6 @@ if(rank == 0){
        #endif

#endif //WRITE_DATA
	//exit(3);

        if(rank == 0)printf("CREATING LINKED LISTS\n");

@@ -638,6 +682,8 @@ if(rank == 0){
	     uus[icount] = uu[ilocal];
	     vvs[icount] = vv[ilocal]-isector*shift;
	     wws[icount] = ww[ilocal];

             /* This comes from  the gridding version
	     for (long ipol=0; ipol<polarisations; ipol++)
             {
	          weightss[ip] = weights[ilocal*polarisations+ipol];
@@ -647,9 +693,9 @@ if(rank == 0){
             {
	          visreals[inu] = visreal[ilocal*polarisations*freq_per_chan+ifreq];
	          visimgs[inu] = visimg[ilocal*polarisations*freq_per_chan+ifreq];
	          //if(visimgs[inu]>1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*polarisations*freq_per_chan+ifreq,Nvis);
	          inu++;
	     }
	     */
	     icount++;
	     current = current->next;
        }
@@ -660,31 +706,6 @@ if(rank == 0){
	compose_time1 += (finishk.tv_sec - begink.tv_sec);
	compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;

        //#ifndef USE_MPI
	double uumin = 1e20;
	double vvmin = 1e20;
	double uumax = -1e20;
	double vvmax = -1e20;
	pFile = fopen (outfile1,"w");

        for (long ipart=0; ipart<Nsec; ipart++)
        {
	       uumin = MIN(uumin,uus[ipart]);
	       uumax = MAX(uumax,uus[ipart]);
	       vvmin = MIN(vvmin,vvs[ipart]);
	       vvmax = MAX(vvmax,vvs[ipart]);


               if(ipart%10 == 0)fprintf (pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]);
        }

	printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax);
	fclose(pFile);
	printf("============> 3\n");
        //#endif




        // Make degriddig on the visibilities
        #ifdef VERBOSE
@@ -707,9 +728,20 @@ if(rank == 0){
#endif
*/

        
        // Distribute the current sector mesh data across all processors
	
        double * gridss;

	#ifdef USE_MPI
        gridss = (double*) calloc(size_of_grid,sizeof(double));
	for (long i=0; i<size_of_grid; i++)gridss[i] = grid[i];
	MPI_Bcast(gridss,size_of_grid,MPI_DOUBLE,isector_count,MPI_COMM_WORLD);
	#else
	gridss = grid;
        #endif

	// Perform the degridding
        degrid(num_w_planes,
               Nsec,
               freq_per_chan,
@@ -728,6 +760,53 @@ if(rank == 0){
               gridss,
               num_threads);

	printf("Degridding for rank %d on sector %d completed\n\n",rank,isector);
	current = sectorhead[isector];
	long inutarget = 0;
	while (current->index != -1)
	{
	      long ilocal = current->index;

	      // in case of error (9) check available memory
	      for (long ifreq=0; ifreq<polarisations*freq_per_chan; ifreq++)
	      {
                  visreal[ilocal*polarisations*freq_per_chan+ifreq] += visreals[inutarget];
                  visimg[ilocal*polarisations*freq_per_chan+ifreq] += visimgs[inutarget];
	          inutarget++;
	      }

	      current = current->next;
	}
	
	free(gridss);

        double uumin = 1e20;
        double vvmin = 1e20;
        double uumax = -1e20;
        double vvmax = -1e20;
        //pFile = fopen (outfile1,"w");

        for (long ipart=0; ipart<Nsec; ipart++)
        {
               uumin = MIN(uumin,uus[ipart]);
               uumax = MAX(uumax,uus[ipart]);
               vvmin = MIN(vvmin,vvs[ipart]);
               vvmax = MAX(vvmax,vvs[ipart]);
               /*
               if(ipart%10 == 0){
		       long icolor = ipart*freq_per_chan*polarisations;
		       fprintf (pFile, "%ld %f %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,visreals[icolor],visimgs[icolor]);
	       }
	       */
        }

        printf("%d UU, VV, min, max = %f %f %f %f\n", rank, uumin, uumax, vvmin, vvmax);
        //fclose(pFile);
        //#endif




#ifdef STOPHERE
/*
int z =0 ;
@@ -790,6 +869,25 @@ int z =0 ;
	free(visimgs);
        // End of loop over sectors
        }
        #ifdef WRITE_DATA
        // each processor writes in a different file
        strcpy(filename,"./");
        strcat(filename,"visibilities");
        char buffer[1];
        sprintf(buffer,"%d",rank);
        strcat(filename,buffer);
        strcat(filename,".txt");
        pFile = fopen (filename,"a");
        for (long ipart=0; ipart<Nmeasures; ipart++)
        {
        if(ipart%30 == 0){
          long icolor = ipart*freq_per_chan*polarisations;
          fprintf (pFile, "%ld %f %f %f %f\n",isector,uu[ipart],vv[ipart],visreal[icolor],visimg[icolor]);
        }
        }
        fclose(pFile);
        #endif

	// End of loop over input files
	}