Commit 1cde6e96 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

...

parent 00a7c7f4
Loading
Loading
Loading
Loading

cuda_fft.cpp

deleted100644 → 0
+0 −162
Original line number Diff line number Diff line
#include <stdlib.h>
#include <stdio.h>
#include <cufftMp.h>
#include <mpi.h>
#include <cuda_runtime.h>
#include <complex.h>
#include "cuComplex.h"
#include "proto.h"
#include "errcodes.h"
#include <time.h>

#if defined(CUFFTMP) && defined(USE_FFTW)

void cuda_fft(
	      int num_w_planes,
	      int grid_size_x,
	      int grid_size_y,
	      int xaxis,
	      int yaxis,
	      double * grid,
	      double * gridss,
	      int rank,
	      MPI_Comm comm)
{

 #if !defined __CUDACC__
  int ndevices;
  cudaGetDeviceCount(&ndevices);
  cudaSetDevice(rank % ndevices);

  if ( rank == 0 ) {
    if (0 == ndevices) {
      
      shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ );
    }
  }
 #endif
  
  cudaError_t mmm;
  cufftResult_t status;

  cufftDoubleComplex *fftwgrid;
  fftwgrid = (cufftDoubleComplex*) malloc(sizeof(cufftDoubleComplex)*2*num_w_planes*yaxis*grid_size_x);



  // Plan creation

  cufftHandle plan;
  status = cufftCreate(&plan);
  if (status != CUFFT_SUCCESS) {printf("!!! cufftCreate ERROR %d !!!\n", status);}

  cudaStream_t stream{};
  cudaStreamCreate(&stream);


  status = cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm);
  if (status != CUFFT_SUCCESS) {printf("!!! cufftMpAttachComm ERROR %d !!!\n", status);}

  status = cufftSetStream(plan, stream);
  if (status != CUFFT_SUCCESS) {printf("!!! cufftSetStream ERROR %d !!!\n", status);}

  size_t workspace;
  status = cufftMakePlan2d(plan, grid_size_x, grid_size_y, CUFFT_Z2Z, &workspace);
  if (status != CUFFT_SUCCESS) {printf("!!! cufftMakePlan2d ERROR %d !!!\n", status);}
  cudaDeviceSynchronize();



  uint fftwindex = 0;
  uint fftwindex2D = 0;
  double norm = 1.0/(double)(grid_size_x*grid_size_y);




  // Grid composition

  for (int iw=0; iw<num_w_planes; iw++)
    {
      //printf("select the %d w-plane to transform\n", iw);
      for (int iv=0; iv<yaxis; iv++)
	{
	  for (int iu=0; iu<xaxis; iu++)
	    {
	      fftwindex2D = iu + iv*xaxis;
	      fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
	      fftwgrid[fftwindex2D].x = grid[fftwindex];
	      fftwgrid[fftwindex2D].y = grid[fftwindex+1];
	    }
	}


      cudaLibXtDesc *fftwgrid_g;
      cudaLibXtDesc *fftwgrid_g2;


      status = cufftXtMalloc(plan, &fftwgrid_g, CUFFT_XT_FORMAT_INPLACE);
      if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMalloc ERROR %d !!!\n", status);}

      status = cufftXtMalloc(plan, &fftwgrid_g2, CUFFT_XT_FORMAT_INPLACE);
      if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMalloc 2 ERROR %d !!!\n", status);}
      cudaDeviceSynchronize();

      mmm = cudaStreamSynchronize(stream);
      if (mmm != cudaSuccess) {printf("!!! cudaStreamSynchronize ERROR %d !!!\n", mmm);}

      status = cufftXtMemcpy(plan, fftwgrid_g, fftwgrid, CUFFT_COPY_HOST_TO_DEVICE);
      if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMemcpy htd fftwgrid_g ERROR %d !!!\n", status);}

      cudaDeviceSynchronize();

      status = cufftXtExecDescriptor(plan, fftwgrid_g, fftwgrid_g, CUFFT_INVERSE);
      if (status != CUFFT_SUCCESS) {printf("!!! cufftXtExecDescriptor ERROR %d !!!\n", status);}


      mmm = cudaStreamSynchronize(stream);
      if (mmm != cudaSuccess) {printf("!!! cudaStreamSynchronize 2 ERROR %d !!!\n", mmm);}

      cudaDeviceSynchronize();


      status = cufftXtMemcpy(plan, fftwgrid_g2, fftwgrid_g, CUFFT_COPY_DEVICE_TO_DEVICE);
      if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMemcpy dtd fftwgrid ERROR %d !!!\n", status);}


      mmm = cudaStreamSynchronize(stream);
      if (mmm != cudaSuccess) {printf("!!! cudaStreamSynchronize 2 ERROR %d !!!\n", mmm);}

      cudaDeviceSynchronize();

      status = cufftXtMemcpy(plan, fftwgrid, fftwgrid_g2, CUFFT_COPY_DEVICE_TO_HOST);
      if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMemcpy dth fftwgrid ERROR %d !!!\n", status);}


      cufftXtFree(fftwgrid_g);
      cufftXtFree(fftwgrid_g2);

      for (int iv=0; iv<yaxis; iv++)
	{
	  for (int iu=0; iu<xaxis; iu++)
	    {
	      fftwindex2D = iu + iv*xaxis;
	      fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
	      gridss[fftwindex] = norm*fftwgrid[fftwindex2D].x;
	      gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].y;
	    }
	}
    }



  status = cufftDestroy(plan);
  if (status != CUFFT_SUCCESS) {printf("!!! cufftDestroy fftwgrid ERROR %d !!!\n", status);}


  cudaStreamDestroy(stream);
  cudaDeviceSynchronize();

}
#endif