Commit f1efc03a authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

cufftMP working with CUDA and OpenMP

parent ea94cf82
Loading
Loading
Loading
Loading
+101 −87
Original line number Diff line number Diff line
@@ -5,10 +5,11 @@
#include <cuda_runtime.h>
#include <complex.h>
#include "cuComplex.h"
#include "w-stacking.h"
#include "proto.h"
#include "errcodes.h"
#include <time.h>

#if defined(CUFFTMP) && !defined(USE_FFTW)
#if defined(CUFFTMP) && defined(USE_FFTW)

void cuda_fft(
	      int num_w_planes,
@@ -18,9 +19,22 @@ void cuda_fft(
	      int yaxis,
	      double * grid,
	      double * gridss,
	      int rank,
	      MPI_Comm comm)
{
#ifdef __CUDACC__

 #if !defined __CUDACC__
  int ndevices;
  cudaGetDeviceCount(&ndevices);
  cudaSetDevice(rank % ndevices);

  if ( rank == 0 ) {
    if (0 == ndevices) {
      
      shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ );
    }
  }
 #endif
  
  cudaError_t mmm;
  cufftResult_t status;
@@ -53,8 +67,8 @@ void cuda_fft(



	long fftwindex = 0;
	long fftwindex2D = 0;
  uint fftwindex = 0;
  uint fftwindex2D = 0;
  double norm = 1.0/(double)(grid_size_x*grid_size_y);


@@ -64,7 +78,7 @@ void cuda_fft(

  for (int iw=0; iw<num_w_planes; iw++)
    {
                printf("select the %d w-plane to transform\n", iw);
      //printf("select the %d w-plane to transform\n", iw);
      for (int iv=0; iv<yaxis; iv++)
	{
	  for (int iu=0; iu<xaxis; iu++)
@@ -143,6 +157,6 @@ void cuda_fft(
  cudaStreamDestroy(stream);
  cudaDeviceSynchronize();

#endif // __CUDACC__
}
#endif