Commit f7773175 authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Useless 3D FFT test removed

parent 76ca4e00
Loading
Loading
Loading
Loading

fourier_transform_new.cpp

deleted100755 → 0
+0 −315
Original line number Diff line number Diff line
#include <stdio.h>
#include "allvars.h"
#include "proto.h"
#include <vector>
#include <complex>
#include <iostream>

#ifdef CUFFTMP
#include <cufftMp.h>
#include <cuda_runtime.h>
#endif

void fftw_data(){

    #ifdef USE_FFTW
        // FFT transform the data (using distributed FFTW)
        if(rank == 0)printf("PERFORMING FFT\n");
        clock_gettime(CLOCK_MONOTONIC, &begin);
        start = clock();

       #ifndef CUFFTMP
        fftw_plan plan;
        fftw_complex *fftwgrid;
        ptrdiff_t alloc_local, local_n0, local_0_start;
        double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);

        // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
        // x is the direction of contiguous data and maps to the second parameter
        // y is the parallelized direction and corresponds to the first parameter (--> n0)
        // and perform the FFT per w plane
        alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start);
        fftwgrid = fftw_alloc_complex(alloc_local);
        plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);

        long fftwindex = 0;
        long fftwindex2D = 0;
        for (int iw=0; iw<param.num_w_planes; iw++)
        {
            //printf("FFTing plan %d\n",iw);
            //select the w-plane to transform
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
                   fftwindex2D = iu + iv*xaxis;
                   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                   fftwgrid[fftwindex2D][0] = grid[fftwindex];
                   fftwgrid[fftwindex2D][1] = grid[fftwindex+1];
               }
            }

	    // do the transform for each w-plane        
            fftw_execute(plan);

            // save the transformed w-plane
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
		   fftwindex2D = iu + iv*xaxis;
                   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                   gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0];
                   gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1];
               }
            }

        }

        fftw_destroy_plan(plan);
        fftw_free(fftwgrid);

	//Implementation of cufftMp
       #else

	//Select the default device per each MPI task
	int ndevices;
	cudaGetDeviceCount(&ndevices);
	cudaSetDevice(rank % ndevices);
	//long my_start = rank*( param.grid_size_y/size );
	//int my_ystart = rank*yaxis;
	//int my_yend   = (rank+1)*yaxis;
	double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);


	//Choose the communicator and allocate the vector
	MPI_Comm comm = MPI_COMM_WORLD;
	size_t Ny   = param.grid_size_y;
	size_t Nx   = param.grid_size_x;
	std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx );
	
	cufftHandle plan = 0;
	cudaStream_t stream = nullptr;
	cudaStreamCreate(&stream);

	//Create the plan for the FFT
	cufftCreate(&plan);
		
	size_t worksize;
	//Domain decomposition along the y-axis
	cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm);

	cufftSetStream(plan, stream);
	cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize);

	long fftwindex = 0;
	long fftwindex2D = 0;

	for (int iw=0; iw<param.num_w_planes; iw++)
	  {
	    //printf("FFTing plan %d\n",iw);
	    //select the w-plane to transform

	    for (int iv=0; iv<yaxis; iv++)
	      {
		for (int iu=0; iu<xaxis; iu++)
		  {
		    fftwindex2D = iu + iv*xaxis;
		    fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);

		    fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]};

		  }
	      }
	      
	    // do the transform for each w-plane
	    //printf("Task %d: ffting the %d plane\n", rank, iw);

	    cudaLibXtDesc *desc, *desc_mean;
	    cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE);
	    cufftXtMalloc(plan, &desc_mean, CUFFT_XT_FORMAT_INPLACE);

	    cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE);
	      
	    cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE);

	    cudaStreamSynchronize(stream);
	    cufftXtMemcpy (plan, desc_mean, desc, CUFFT_COPY_DEVICE_TO_DEVICE);
	    cufftXtMemcpy(plan, fftwgrid.data(), desc_mean, CUFFT_COPY_DEVICE_TO_HOST);

	    cufftXtFree(desc);
	    cufftXtFree(desc_mean);

	    for (int iv=0; iv<yaxis; iv++)
	      {
		for (int iu=0; iu<xaxis; iu++)
		  {
		    fftwindex2D = iu + iv*xaxis;
		    fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
		    gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real();
		    gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag();
		  }
	      }
	  }
	    
	cufftDestroy(plan);
	
       #endif //close ifndef ACCOMP
	
	
       #ifdef ONE_SIDE
        MPI_Win_fence(0,slabwin);
        MPI_Barrier(MPI_COMM_WORLD);
       #else
	MPI_Barrier(MPI_COMM_WORLD);
       #endif

        end = clock();
        clock_gettime(CLOCK_MONOTONIC, &finish);
        timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC;
        timing.fftw_time1 = (finish.tv_sec - begin.tv_sec);
        timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
        clock_gettime(CLOCK_MONOTONIC, &begin);

       #endif
	
}

void write_fftw_data(){

   #ifdef USE_FFTW
     #ifdef WRITE_DATA
        // Write results
        #ifdef USE_MPI
        MPI_Win writewin;
        MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin);
        MPI_Win_fence(0,writewin);
        #endif
        if (rank == 0)
        {
          printf("WRITING FFT TRANSFORMED DATA\n");
          file.pFilereal = fopen (out.fftfile2,"wb");
          file.pFileimg = fopen (out.fftfile3,"wb");
          #ifdef USE_MPI
          for (int isector=0; isector<nsectors; isector++)
          {
              MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin);
              MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin);
              MPI_Win_unlock(isector,writewin);
              for (long i=0; i<size_of_grid/2; i++)
              {
                      gridss_real[i] = gridss_w[2*i];
                      gridss_img[i] = gridss_w[2*i+1];
              }
              if (param.num_w_planes > 1)
              {
                for (int iw=0; iw<param.num_w_planes; iw++)
                for (int iv=0; iv<yaxis; iv++)
                for (int iu=0; iu<xaxis; iu++)
                {
                          long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
                          fseek(file.pFilereal, global_index, SEEK_SET);
                          fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal);
                }
                for (int iw=0; iw<param.num_w_planes; iw++)
                for (int iv=0; iv<yaxis; iv++)
                for (int iu=0; iu<xaxis; iu++)
                {
                          long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
                          fseek(file.pFileimg, global_index, SEEK_SET);
                          fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg);
                }
               } 
               else 
               {
                          fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal);
                          fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg);
               }

           }
           #else
           /*
               for (int iw=0; iw<param.num_w_planes; iw++)
                 for (int iv=0; iv<grid_size_y; iv++)
                   for (int iu=0; iu<grid_size_x; iu++)
                   {
                          int isector = 0;
                          long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
                          double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
                          fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
                   }
           */
           #endif

           fclose(file.pFilereal);
           fclose(file.pFileimg);
        }
        #ifdef USE_MPI
               MPI_Win_fence(0,writewin);
               MPI_Win_free(&writewin);
               MPI_Barrier(MPI_COMM_WORLD);
        #endif
     #endif //WRITE_DATA


     // Phase correction  

     clock_gettime(CLOCK_MONOTONIC, &begin);
     start = clock();
     if(rank == 0)printf("PHASE CORRECTION\n");
     double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));
     double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));

     phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank);

     end = clock();
     clock_gettime(CLOCK_MONOTONIC, &finish);
     timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
     timing.phase_time1 = (finish.tv_sec - begin.tv_sec);
     timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;

     #ifdef WRITE_IMAGE

        if(rank == 0)
        {
            file.pFilereal = fopen (out.fftfile2,"wb");
            file.pFileimg = fopen (out.fftfile3,"wb");
            fclose(file.pFilereal);
            fclose(file.pFileimg);
        }
        #ifdef USE_MPI
            MPI_Barrier(MPI_COMM_WORLD);
        #endif
        if(rank == 0)printf("WRITING IMAGE\n");
        for (int isector=0; isector<size; isector++)
        {
            #ifdef USE_MPI
                MPI_Barrier(MPI_COMM_WORLD);
            #endif
            if(isector == rank)
            {
               printf("%d writing\n",isector);
               file.pFilereal = fopen (out.fftfile2,"ab");
               file.pFileimg = fopen (out.fftfile3,"ab");

               long global_index = isector*(xaxis*yaxis)*sizeof(double);

               fseek(file.pFilereal, global_index, SEEK_SET);
               fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal);
               fseek(file.pFileimg, global_index, SEEK_SET);
               fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg);

               fclose(file.pFilereal);
               fclose(file.pFileimg);
            }
        }
        #ifdef USE_MPI
           MPI_Barrier(MPI_COMM_WORLD);
        #endif

     #endif //WRITE_IMAGE

  #endif  //FFTW
}

fourier_transform_test.cpp

deleted100755 → 0
+0 −313
Original line number Diff line number Diff line
#include <stdio.h>
#include "allvars.h"
#include "proto.h"
#include <vector>
#include <complex>
#include <iostream>

#ifdef CUFFTMP
#include <cufftMp.h>
#include <cuda_runtime.h>
#endif

void fftw_data(){

    #ifdef USE_FFTW
        // FFT transform the data (using distributed FFTW)
        if(rank == 0)printf("PERFORMING FFT\n");
        clock_gettime(CLOCK_MONOTONIC, &begin);
        start = clock();

       #ifndef CUFFTMP
        fftw_plan plan;
        fftw_complex *fftwgrid;
        ptrdiff_t alloc_local, local_n0, local_0_start;
        double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);

        // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2])
        // x is the direction of contiguous data and maps to the second parameter
        // y is the parallelized direction and corresponds to the first parameter (--> n0)
        // and perform the FFT per w plane
        alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start);
        fftwgrid = fftw_alloc_complex(alloc_local);
        plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);

        long fftwindex = 0;
        long fftwindex2D = 0;
        for (int iw=0; iw<param.num_w_planes; iw++)
        {
            //printf("FFTing plan %d\n",iw);
            //select the w-plane to transform
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
                   fftwindex2D = iu + iv*xaxis;
                   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                   fftwgrid[fftwindex2D][0] = grid[fftwindex];
                   fftwgrid[fftwindex2D][1] = grid[fftwindex+1];
               }
            }

	    // do the transform for each w-plane        
            fftw_execute(plan);

            // save the transformed w-plane
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
		   fftwindex2D = iu + iv*xaxis;
                   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                   gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0];
                   gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1];
               }
            }

        }

        fftw_destroy_plan(plan);
        fftw_free(fftwgrid);

	//Implementation of cufftMp
       #else

	//Select the default device per each MPI task
	int ndevices;
	cudaGetDeviceCount(&ndevices);
	cudaSetDevice(rank % ndevices);
	//long my_start = rank*( param.grid_size_y/size );
	//int my_ystart = rank*yaxis;
	//int my_yend   = (rank+1)*yaxis;
	double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y);


	//Choose the communicator and allocate the vector
	MPI_Comm comm = MPI_COMM_WORLD;
	size_t Ny   = param.grid_size_y;
	size_t Nx   = param.grid_size_x;
	std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx );
	
	cufftHandle plan = 0;
	cudaStream_t stream = nullptr;
	cudaStreamCreate(&stream);

	//Create the plan for the FFT
	cufftCreate(&plan);
		
	size_t worksize;
	//Domain decomposition along the y-axis
	cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm);

	cufftSetStream(plan, stream);
	cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize);

	long fftwindex = 0;
	long fftwindex2D = 0;

	for (int iw=0; iw<param.num_w_planes; iw++)
	  {
	    //printf("FFTing plan %d\n",iw);
	    //select the w-plane to transform

	    for (int iv=0; iv<yaxis; iv++)
	      {
		for (int iu=0; iu<xaxis; iu++)
		  {
		    fftwindex2D = iu + iv*xaxis;
		    fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);

		    fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]};

		  }
	      }
	      
	    // do the transform for each w-plane
	    //printf("Task %d: ffting the %d plane\n", rank, iw);

	    cudaLibXtDesc *desc;
	    cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE);
	    
	    cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE);
	      
	    cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE);

	    cudaStreamSynchronize(stream);
	    cufftXtMemcpy (plan, desc, desc, CUFFT_COPY_DEVICE_TO_DEVICE);
	    cufftXtMemcpy(plan, fftwgrid.data(), desc, CUFFT_COPY_DEVICE_TO_HOST);

	    cufftXtFree(desc);
	    
	    for (int iv=0; iv<yaxis; iv++)
	      {
		for (int iu=0; iu<xaxis; iu++)
		  {
		    fftwindex2D = iu + iv*xaxis;
		    fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
		    gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real();
		    gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag();
		  }
	      }
	  }
	    
	cufftDestroy(plan);
	
       #endif //close ifndef ACCOMP
	
	
       #ifdef ONE_SIDE
        MPI_Win_fence(0,slabwin);
        MPI_Barrier(MPI_COMM_WORLD);
       #else
	MPI_Barrier(MPI_COMM_WORLD);
       #endif

        end = clock();
        clock_gettime(CLOCK_MONOTONIC, &finish);
        timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC;
        timing.fftw_time1 = (finish.tv_sec - begin.tv_sec);
        timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
        clock_gettime(CLOCK_MONOTONIC, &begin);

       #endif
	
}

void write_fftw_data(){

   #ifdef USE_FFTW
     #ifdef WRITE_DATA
        // Write results
        #ifdef USE_MPI
        MPI_Win writewin;
        MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin);
        MPI_Win_fence(0,writewin);
        #endif
        if (rank == 0)
        {
          printf("WRITING FFT TRANSFORMED DATA\n");
          file.pFilereal = fopen (out.fftfile2,"wb");
          file.pFileimg = fopen (out.fftfile3,"wb");
          #ifdef USE_MPI
          for (int isector=0; isector<nsectors; isector++)
          {
              MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin);
              MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin);
              MPI_Win_unlock(isector,writewin);
              for (long i=0; i<size_of_grid/2; i++)
              {
                      gridss_real[i] = gridss_w[2*i];
                      gridss_img[i] = gridss_w[2*i+1];
              }
              if (param.num_w_planes > 1)
              {
                for (int iw=0; iw<param.num_w_planes; iw++)
                for (int iv=0; iv<yaxis; iv++)
                for (int iu=0; iu<xaxis; iu++)
                {
                          long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
                          fseek(file.pFilereal, global_index, SEEK_SET);
                          fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal);
                }
                for (int iw=0; iw<param.num_w_planes; iw++)
                for (int iv=0; iv<yaxis; iv++)
                for (int iu=0; iu<xaxis; iu++)
                {
                          long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double);
                          long index = iu + iv*xaxis + iw*xaxis*yaxis;
                          fseek(file.pFileimg, global_index, SEEK_SET);
                          fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg);
                }
               } 
               else 
               {
                          fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal);
                          fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg);
               }

           }
           #else
           /*
               for (int iw=0; iw<param.num_w_planes; iw++)
                 for (int iv=0; iv<grid_size_y; iv++)
                   for (int iu=0; iu<grid_size_x; iu++)
                   {
                          int isector = 0;
                          long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
                          double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
                          fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
                   }
           */
           #endif

           fclose(file.pFilereal);
           fclose(file.pFileimg);
        }
        #ifdef USE_MPI
               MPI_Win_fence(0,writewin);
               MPI_Win_free(&writewin);
               MPI_Barrier(MPI_COMM_WORLD);
        #endif
     #endif //WRITE_DATA


     // Phase correction  

     clock_gettime(CLOCK_MONOTONIC, &begin);
     start = clock();
     if(rank == 0)printf("PHASE CORRECTION\n");
     double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double));
     double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double));

     phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank);

     end = clock();
     clock_gettime(CLOCK_MONOTONIC, &finish);
     timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC;
     timing.phase_time1 = (finish.tv_sec - begin.tv_sec);
     timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;

     #ifdef WRITE_IMAGE

        if(rank == 0)
        {
            file.pFilereal = fopen (out.fftfile2,"wb");
            file.pFileimg = fopen (out.fftfile3,"wb");
            fclose(file.pFilereal);
            fclose(file.pFileimg);
        }
        #ifdef USE_MPI
            MPI_Barrier(MPI_COMM_WORLD);
        #endif
        if(rank == 0)printf("WRITING IMAGE\n");
        for (int isector=0; isector<size; isector++)
        {
            #ifdef USE_MPI
                MPI_Barrier(MPI_COMM_WORLD);
            #endif
            if(isector == rank)
            {
               printf("%d writing\n",isector);
               file.pFilereal = fopen (out.fftfile2,"ab");
               file.pFileimg = fopen (out.fftfile3,"ab");

               long global_index = isector*(xaxis*yaxis)*sizeof(double);

               fseek(file.pFilereal, global_index, SEEK_SET);
               fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal);
               fseek(file.pFileimg, global_index, SEEK_SET);
               fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg);

               fclose(file.pFilereal);
               fclose(file.pFileimg);
            }
        }
        #ifdef USE_MPI
           MPI_Barrier(MPI_COMM_WORLD);
        #endif

     #endif //WRITE_IMAGE

  #endif  //FFTW
}