Commit a7e1860b authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Implementation for cuFFT

parent b48befbc
Loading
Loading
Loading
Loading

cuda_fft.cu

0 → 100644
+122 −0
Original line number Original line Diff line number Diff line
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <cufft.h>
#include <cufftw.h>
#include "w-stacking.h"
#include <cuda_runtime.h>
#include <complex.h>
#include "cuComplex.h"

/*
#ifdef __CUDACC__
double __device__
#else
double
#endif
*/


//__global__ void write_grid






void cuda_fft(
	int num_w_planes,
	int grid_size_x,
	int grid_size_y,
	int xaxis,
	int yaxis,
	double * grid,
	double * gridss)
{

#ifdef __CUDACC__

	cufftDoubleComplex *fftwgrid;
	cufftDoubleComplex *fftwgrid_g;
	cudaError_t mmm;
	cufftResult_t status;



	// Define the CUDA set up
	int Nth = NTHREADS;
	long Nbl = (long)(num_w_planes*xaxis*yaxis/Nth) + 1;
	if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
	//long Nvis = num_points*freq_per_chan*polarizations;
	///printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
	fftwgrid =(cufftDoubleComplex*) malloc(sizeof(cufftDoubleComplex)*2*grid_size_x*grid_size_y);


	mmm=cudaMalloc(&fftwgrid_g, sizeof(cufftDoubleComplex)*2*grid_size_x*grid_size_y);
	if (mmm != cudaSuccess) {printf("cudaMalloc error %d\n",mmm);}
	//mmm=cudaMemset(fftwgrid_g, 0.0, 2*grid_size_x*grid_size_y*sizeof(cufftDoubleComplex));
	printf("malloc fftwgrid_g\n");


        cufftHandle plan;
	cufftCreate(&plan);
        cufftPlan1d(&plan, grid_size_x*grid_size_y, CUFFT_Z2Z, 1);
	printf("plan creation\n");
        double norm = 1.0/(double)(grid_size_x*grid_size_y);
        long fftwindex = 0;
        long fftwindex2D = 0;

        for (int iw=0; iw<num_w_planes; iw++)
        {
            printf("select the w-plane to transform\n");
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
                   fftwindex2D = iu + iv*xaxis;
                   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
		   fftwgrid[fftwindex2D].x = grid[fftwindex];
		   fftwgrid[fftwindex2D].y = grid[fftwindex+1];
               }
            }

	    //printf("fftwgrid[1260137].x pre cuFFT %f\n", fftwgrid[1260137].x);


	    mmm=cudaMemcpy(fftwgrid_g, fftwgrid, sizeof(cufftDoubleComplex)*2*grid_size_x*grid_size_y, cudaMemcpyHostToDevice);
            if (mmm != cudaSuccess) {printf("cudaMemcpy error %d\n",mmm);}
	    printf("memcpy fftwgrid to fftwgrid_g\n");


            printf("do the transform for each w-plane\n");

            status=cufftExecZ2Z(plan, fftwgrid_g, fftwgrid_g, CUFFT_INVERSE);
	    if (status != CUFFT_SUCCESS) {printf("cufftexec error %d\n",status);}
	    cudaDeviceSynchronize();


            mmm=cudaMemcpy(fftwgrid, fftwgrid_g, sizeof(cufftDoubleComplex)*2*grid_size_x*grid_size_y, cudaMemcpyDeviceToHost);
	    if (mmm != cudaSuccess) {printf("cudaMemcpy error %d\n",mmm);}
	    printf("memcpy fftwgrid_g to fftwgrid\n");
	    //printf("fftwgrid[1260137].x post cuFFT %f\n", fftwgrid[1260137].x);


            // save the transformed w-plane
            for (int iv=0; iv<yaxis; iv++)
            {
               for (int iu=0; iu<xaxis; iu++)
               {
                   fftwindex2D = iu + iv*xaxis;
                   fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                   gridss[fftwindex] = norm*fftwgrid[fftwindex2D].x;
                   gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].y;
               }
            }

        }

        cufftDestroy(plan);

	mmm = cudaFree(fftwgrid_g);
#endif
}