Commit 84b76b6b authored by Emanuele De Rubeis's avatar Emanuele De Rubeis
Browse files

Delete cuda_cufftmp.cu

parent 4c4ffcb2
Loading
Loading
Loading
Loading

cuda_cufftmp.cu

deleted100644 → 0
+0 −194
Original line number Diff line number Diff line
#include <stdlib.h>
#include <stdio.h>
#include "/cineca/prod/opt/compilers/hpc-sdk/2023/binary/Linux_ppc64le/23.1/math_libs/11.8/include/cufftmp/cufftMp.h"
#include <mpi.h>
#include <cuda_runtime.h>
#include <complex.h>
#include "cuComplex.h"
#include "w-stacking.h"
#define FILENAMELENGTH 30



void cuda_fft(
	int num_w_planes,
	int grid_size_x,
	int grid_size_y,
	int xaxis,
	int yaxis,
	double * grid,
	double * gridss,
	MPI_Comm comm,
	int rank,
	int size)
{
#ifdef __CUDACC__

        FILE * pFiletestmp0;
        FILE * pFiletestmp1;

	char testcufftmp0[FILENAMELENGTH] = "testcufftmp0_postfft.txt";
        char testcufftmp1[FILENAMELENGTH] = "testcufftmp1_postfft.txt";

	pFiletestmp0 = fopen(testcufftmp0, "w");
	pFiletestmp1 = fopen(testcufftmp1, "w");

        cudaError_t mmm;
        cufftResult_t status;
/*
        clock_t startcu, endcu;
        double cufft_exec_time;
        struct timespec begincu, finishcu;
*/
	cufftDoubleComplex *fftwgrid;
	fftwgrid = (cufftDoubleComplex*) malloc(sizeof(cufftDoubleComplex)*2*num_w_planes*yaxis*grid_size_x);


	// Plan creation

	cufftHandle plan;
        status = cufftCreate(&plan);
        if (status != CUFFT_SUCCESS) {printf("!!! cufftCreate ERROR %d !!!\n", status);}

	cudaStream_t stream{};
	cudaStreamCreate(&stream);


	status = cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm);
	if (status != CUFFT_SUCCESS) {printf("!!! cufftMpAttachComm ERROR %d !!!\n", status);}

	status = cufftSetStream(plan, stream);
	if (status != CUFFT_SUCCESS) {printf("!!! cufftSetStream ERROR %d !!!\n", status);}

	size_t workspace;
	status = cufftMakePlan2d(plan, grid_size_x, grid_size_y, CUFFT_Z2Z, &workspace);
	if (status != CUFFT_SUCCESS) {printf("!!! cufftMakePlan2d ERROR %d !!!\n", status);}
	cudaDeviceSynchronize();


	// Descriptor creation

	cudaLibXtDesc *fftwgrid_g;
	cudaLibXtDesc *fftwgrid_g2;

	status = cufftXtMalloc(plan, &fftwgrid_g, CUFFT_XT_FORMAT_INPLACE);
	if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMalloc ERROR %d !!!\n", status);}
	cudaDeviceSynchronize();

        status = cufftXtMalloc(plan, &fftwgrid_g2, CUFFT_XT_FORMAT_INPLACE);
        if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMalloc 2 ERROR %d !!!\n", status);}
        cudaDeviceSynchronize();




	long fftwindex = 0;
	long fftwindex2D = 0;
	double norm = 1.0/(double)(grid_size_x*grid_size_y);




	// Grid composition

	for (int iw=0; iw<num_w_planes; iw++)
        {
                printf("select the %d w-plane to transform\n", iw);
                for (int iv=0; iv<yaxis; iv++)
                {
                        for (int iu=0; iu<xaxis; iu++)
                        {
	                        fftwindex2D = iu + iv*xaxis;
				fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                	        fftwgrid[fftwindex2D].x = grid[fftwindex];
                        	fftwgrid[fftwindex2D].y = grid[fftwindex+1];
                        }
                }


		cudaDeviceSynchronize();

                mmm = cudaStreamSynchronize(stream);
                if (mmm != cudaSuccess) {printf("!!! cudaStreamSynchronize ERROR %d !!!\n", mmm);}

		status = cufftXtMemcpy(plan, fftwgrid_g, fftwgrid, CUFFT_COPY_HOST_TO_DEVICE);
		if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMemcpy htd fftwgrid_g ERROR %d !!!\n", status);}

		cudaDeviceSynchronize();


/*
		clock_gettime(CLOCK_MONOTONIC, &begincu);
		startcu = clock();
*/
		status = cufftXtExecDescriptor(plan, fftwgrid_g, fftwgrid_g, CUFFT_INVERSE);
		if (status != CUFFT_SUCCESS) {printf("!!! cufftXtExecDescriptor ERROR %d !!!\n", status);}

/*
		endcu = clock();
		clock_gettime(CLOCK_MONOTONIC, &finishcu);
		cufft_exec_time = ((double) (endcu - startcu)) / CLOCKS_PER_SEC;
        	printf("Time for multiGPU cuFFT plan execution is %f sec\n", cufft_exec_time);
*/


                mmm = cudaStreamSynchronize(stream);
                if (mmm != cudaSuccess) {printf("!!! cudaStreamSynchronize 2 ERROR %d !!!\n", mmm);}

		cudaDeviceSynchronize();

                status = cufftXtMemcpy(plan, fftwgrid_g2, fftwgrid_g, CUFFT_COPY_DEVICE_TO_DEVICE);
                if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMemcpy dtd fftwgrid ERROR %d !!!\n", status);}

                cudaDeviceSynchronize();


                mmm = cudaStreamSynchronize(stream);
                if (mmm != cudaSuccess) {printf("!!! cudaStreamSynchronize 2 ERROR %d !!!\n", mmm);}

                cudaDeviceSynchronize();

                status = cufftXtMemcpy(plan, fftwgrid, fftwgrid_g2, CUFFT_COPY_DEVICE_TO_HOST);
                if (status != CUFFT_SUCCESS) {printf("!!! cufftXtMemcpy dth fftwgrid ERROR %d !!!\n", status);}




		for (int iv=0; iv<yaxis; iv++)
                {
                        for (int iu=0; iu<xaxis; iu++)
                        {
                        	fftwindex2D = iu + iv*xaxis;
                        	fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis);
                        	gridss[fftwindex] = norm*fftwgrid[fftwindex2D].x;
                        	gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].y;
                                /*if (rank == 0)
                                {
                                	fprintf(pFiletestmp0, "fftwgrid[fftwindex2D].x post cuFFT is %f for rank %d\n", fftwgrid[fftwindex2D].x);
                                } else if (rank == 1) {
                                	fprintf(pFiletestmp1, "fftwgrid[fftwindex2D].x post cuFFT is %f for rank %d\n", fftwgrid[fftwindex2D].x);
				}*/
			}
        	}
	}

	fclose(pFiletestmp0);
	fclose(pFiletestmp1);

	status = cufftXtFree(fftwgrid_g);
	if (status != CUFFT_SUCCESS) {printf("!!! cufftXtFree ERROR %d !!!\n", status);}

        status = cufftXtFree(fftwgrid_g2);
        if (status != CUFFT_SUCCESS) {printf("!!! cufftXtFree ERROR %d !!!\n", status);}



	status = cufftDestroy(plan);
	if (status != CUFFT_SUCCESS) {printf("!!! cufftDestroy fftwgrid ERROR %d !!!\n", status);}


	cudaStreamDestroy(stream);
	cudaDeviceSynchronize();

#endif // __CUDACC__
}