Loading cuda_fft.cu +101 −87 Original line number Diff line number Diff line Loading @@ -5,10 +5,11 @@ #include <cuda_runtime.h> #include <complex.h> #include "cuComplex.h" #include "w-stacking.h" #include "proto.h" #include "errcodes.h" #include <time.h> #if defined(CUFFTMP) && !defined(USE_FFTW) #if defined(CUFFTMP) && defined(USE_FFTW) void cuda_fft( int num_w_planes, Loading @@ -18,9 +19,22 @@ void cuda_fft( int yaxis, double * grid, double * gridss, int rank, MPI_Comm comm) { #ifdef __CUDACC__ #if !defined __CUDACC__ int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); if ( rank == 0 ) { if (0 == ndevices) { shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); } } #endif cudaError_t mmm; cufftResult_t status; Loading Loading @@ -53,8 +67,8 @@ void cuda_fft( long fftwindex = 0; long fftwindex2D = 0; uint fftwindex = 0; uint fftwindex2D = 0; double norm = 1.0/(double)(grid_size_x*grid_size_y); Loading @@ -64,7 +78,7 @@ void cuda_fft( for (int iw=0; iw<num_w_planes; iw++) { printf("select the %d w-plane to transform\n", iw); //printf("select the %d w-plane to transform\n", iw); for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) Loading Loading @@ -143,6 +157,6 @@ void cuda_fft( cudaStreamDestroy(stream); cudaDeviceSynchronize(); #endif // __CUDACC__ } #endif Loading
cuda_fft.cu +101 −87 Original line number Diff line number Diff line Loading @@ -5,10 +5,11 @@ #include <cuda_runtime.h> #include <complex.h> #include "cuComplex.h" #include "w-stacking.h" #include "proto.h" #include "errcodes.h" #include <time.h> #if defined(CUFFTMP) && !defined(USE_FFTW) #if defined(CUFFTMP) && defined(USE_FFTW) void cuda_fft( int num_w_planes, Loading @@ -18,9 +19,22 @@ void cuda_fft( int yaxis, double * grid, double * gridss, int rank, MPI_Comm comm) { #ifdef __CUDACC__ #if !defined __CUDACC__ int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); if ( rank == 0 ) { if (0 == ndevices) { shutdown_wstacking(NO_ACCELERATORS_FOUND, "No accelerators found", __FILE__, __LINE__ ); } } #endif cudaError_t mmm; cufftResult_t status; Loading Loading @@ -53,8 +67,8 @@ void cuda_fft( long fftwindex = 0; long fftwindex2D = 0; uint fftwindex = 0; uint fftwindex2D = 0; double norm = 1.0/(double)(grid_size_x*grid_size_y); Loading @@ -64,7 +78,7 @@ void cuda_fft( for (int iw=0; iw<num_w_planes; iw++) { printf("select the %d w-plane to transform\n", iw); //printf("select the %d w-plane to transform\n", iw); for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) Loading Loading @@ -143,6 +157,6 @@ void cuda_fft( cudaStreamDestroy(stream); cudaDeviceSynchronize(); #endif // __CUDACC__ } #endif