Loading fourier_transform_new.cppdeleted 100755 → 0 +0 −315 Original line number Diff line number Diff line #include <stdio.h> #include "allvars.h" #include "proto.h" #include <vector> #include <complex> #include <iostream> #ifdef CUFFTMP #include <cufftMp.h> #include <cuda_runtime.h> #endif void fftw_data(){ #ifdef USE_FFTW // FFT transform the data (using distributed FFTW) if(rank == 0)printf("PERFORMING FFT\n"); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); #ifndef CUFFTMP fftw_plan plan; fftw_complex *fftwgrid; ptrdiff_t alloc_local, local_n0, local_0_start; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) // x is the direction of contiguous data and maps to the second parameter // y is the parallelized direction and corresponds to the first parameter (--> n0) // and perform the FFT per w plane alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); fftwgrid = fftw_alloc_complex(alloc_local); plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D][0] = grid[fftwindex]; fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; } } // do the transform for each w-plane fftw_execute(plan); // save the transformed w-plane for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; } } } fftw_destroy_plan(plan); fftw_free(fftwgrid); //Implementation of cufftMp #else //Select the default device per each MPI task int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); //long my_start = rank*( param.grid_size_y/size ); //int my_ystart = rank*yaxis; //int my_yend = (rank+1)*yaxis; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); //Choose the communicator and allocate the vector MPI_Comm comm = MPI_COMM_WORLD; size_t Ny = param.grid_size_y; size_t Nx = param.grid_size_x; std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx ); cufftHandle plan = 0; cudaStream_t stream = nullptr; cudaStreamCreate(&stream); //Create the plan for the FFT cufftCreate(&plan); size_t worksize; //Domain decomposition along the y-axis cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); cufftSetStream(plan, stream); cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]}; } } // do the transform for each w-plane //printf("Task %d: ffting the %d plane\n", rank, iw); cudaLibXtDesc *desc, *desc_mean; cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE); cufftXtMalloc(plan, &desc_mean, CUFFT_XT_FORMAT_INPLACE); cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE); cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE); cudaStreamSynchronize(stream); cufftXtMemcpy (plan, desc_mean, desc, CUFFT_COPY_DEVICE_TO_DEVICE); cufftXtMemcpy(plan, fftwgrid.data(), desc_mean, CUFFT_COPY_DEVICE_TO_HOST); cufftXtFree(desc); cufftXtFree(desc_mean); for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real(); gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag(); } } } cufftDestroy(plan); #endif //close ifndef ACCOMP #ifdef ONE_SIDE MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #else MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); #endif } void write_fftw_data(){ #ifdef USE_FFTW #ifdef WRITE_DATA // Write results #ifdef USE_MPI MPI_Win writewin; MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); MPI_Win_fence(0,writewin); #endif if (rank == 0) { printf("WRITING FFT TRANSFORMED DATA\n"); file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); #ifdef USE_MPI for (int isector=0; isector<nsectors; isector++) { MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); MPI_Win_unlock(isector,writewin); for (long i=0; i<size_of_grid/2; i++) { gridss_real[i] = gridss_w[2*i]; gridss_img[i] = gridss_w[2*i+1]; } if (param.num_w_planes > 1) { for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal); } for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg); } } else { fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal); fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg); } } #else /* for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<grid_size_y; iv++) for (int iu=0; iu<grid_size_x; iu++) { int isector = 0; long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); } */ #endif fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Win_fence(0,writewin); MPI_Win_free(&writewin); MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_DATA // Phase correction clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.phase_time1 = (finish.tv_sec - begin.tv_sec); timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; #ifdef WRITE_IMAGE if(rank == 0) { file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); file.pFilereal = fopen (out.fftfile2,"ab"); file.pFileimg = fopen (out.fftfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); fseek(file.pFilereal, global_index, SEEK_SET); fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal); fseek(file.pFileimg, global_index, SEEK_SET); fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg); fclose(file.pFilereal); fclose(file.pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE #endif //FFTW } fourier_transform_test.cppdeleted 100755 → 0 +0 −313 Original line number Diff line number Diff line #include <stdio.h> #include "allvars.h" #include "proto.h" #include <vector> #include <complex> #include <iostream> #ifdef CUFFTMP #include <cufftMp.h> #include <cuda_runtime.h> #endif void fftw_data(){ #ifdef USE_FFTW // FFT transform the data (using distributed FFTW) if(rank == 0)printf("PERFORMING FFT\n"); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); #ifndef CUFFTMP fftw_plan plan; fftw_complex *fftwgrid; ptrdiff_t alloc_local, local_n0, local_0_start; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) // x is the direction of contiguous data and maps to the second parameter // y is the parallelized direction and corresponds to the first parameter (--> n0) // and perform the FFT per w plane alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); fftwgrid = fftw_alloc_complex(alloc_local); plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D][0] = grid[fftwindex]; fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; } } // do the transform for each w-plane fftw_execute(plan); // save the transformed w-plane for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; } } } fftw_destroy_plan(plan); fftw_free(fftwgrid); //Implementation of cufftMp #else //Select the default device per each MPI task int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); //long my_start = rank*( param.grid_size_y/size ); //int my_ystart = rank*yaxis; //int my_yend = (rank+1)*yaxis; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); //Choose the communicator and allocate the vector MPI_Comm comm = MPI_COMM_WORLD; size_t Ny = param.grid_size_y; size_t Nx = param.grid_size_x; std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx ); cufftHandle plan = 0; cudaStream_t stream = nullptr; cudaStreamCreate(&stream); //Create the plan for the FFT cufftCreate(&plan); size_t worksize; //Domain decomposition along the y-axis cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); cufftSetStream(plan, stream); cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]}; } } // do the transform for each w-plane //printf("Task %d: ffting the %d plane\n", rank, iw); cudaLibXtDesc *desc; cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE); cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE); cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE); cudaStreamSynchronize(stream); cufftXtMemcpy (plan, desc, desc, CUFFT_COPY_DEVICE_TO_DEVICE); cufftXtMemcpy(plan, fftwgrid.data(), desc, CUFFT_COPY_DEVICE_TO_HOST); cufftXtFree(desc); for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real(); gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag(); } } } cufftDestroy(plan); #endif //close ifndef ACCOMP #ifdef ONE_SIDE MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #else MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); #endif } void write_fftw_data(){ #ifdef USE_FFTW #ifdef WRITE_DATA // Write results #ifdef USE_MPI MPI_Win writewin; MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); MPI_Win_fence(0,writewin); #endif if (rank == 0) { printf("WRITING FFT TRANSFORMED DATA\n"); file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); #ifdef USE_MPI for (int isector=0; isector<nsectors; isector++) { MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); MPI_Win_unlock(isector,writewin); for (long i=0; i<size_of_grid/2; i++) { gridss_real[i] = gridss_w[2*i]; gridss_img[i] = gridss_w[2*i+1]; } if (param.num_w_planes > 1) { for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal); } for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg); } } else { fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal); fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg); } } #else /* for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<grid_size_y; iv++) for (int iu=0; iu<grid_size_x; iu++) { int isector = 0; long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); } */ #endif fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Win_fence(0,writewin); MPI_Win_free(&writewin); MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_DATA // Phase correction clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.phase_time1 = (finish.tv_sec - begin.tv_sec); timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; #ifdef WRITE_IMAGE if(rank == 0) { file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); file.pFilereal = fopen (out.fftfile2,"ab"); file.pFileimg = fopen (out.fftfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); fseek(file.pFilereal, global_index, SEEK_SET); fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal); fseek(file.pFileimg, global_index, SEEK_SET); fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg); fclose(file.pFilereal); fclose(file.pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE #endif //FFTW } Loading
fourier_transform_new.cppdeleted 100755 → 0 +0 −315 Original line number Diff line number Diff line #include <stdio.h> #include "allvars.h" #include "proto.h" #include <vector> #include <complex> #include <iostream> #ifdef CUFFTMP #include <cufftMp.h> #include <cuda_runtime.h> #endif void fftw_data(){ #ifdef USE_FFTW // FFT transform the data (using distributed FFTW) if(rank == 0)printf("PERFORMING FFT\n"); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); #ifndef CUFFTMP fftw_plan plan; fftw_complex *fftwgrid; ptrdiff_t alloc_local, local_n0, local_0_start; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) // x is the direction of contiguous data and maps to the second parameter // y is the parallelized direction and corresponds to the first parameter (--> n0) // and perform the FFT per w plane alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); fftwgrid = fftw_alloc_complex(alloc_local); plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D][0] = grid[fftwindex]; fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; } } // do the transform for each w-plane fftw_execute(plan); // save the transformed w-plane for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; } } } fftw_destroy_plan(plan); fftw_free(fftwgrid); //Implementation of cufftMp #else //Select the default device per each MPI task int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); //long my_start = rank*( param.grid_size_y/size ); //int my_ystart = rank*yaxis; //int my_yend = (rank+1)*yaxis; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); //Choose the communicator and allocate the vector MPI_Comm comm = MPI_COMM_WORLD; size_t Ny = param.grid_size_y; size_t Nx = param.grid_size_x; std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx ); cufftHandle plan = 0; cudaStream_t stream = nullptr; cudaStreamCreate(&stream); //Create the plan for the FFT cufftCreate(&plan); size_t worksize; //Domain decomposition along the y-axis cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); cufftSetStream(plan, stream); cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]}; } } // do the transform for each w-plane //printf("Task %d: ffting the %d plane\n", rank, iw); cudaLibXtDesc *desc, *desc_mean; cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE); cufftXtMalloc(plan, &desc_mean, CUFFT_XT_FORMAT_INPLACE); cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE); cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE); cudaStreamSynchronize(stream); cufftXtMemcpy (plan, desc_mean, desc, CUFFT_COPY_DEVICE_TO_DEVICE); cufftXtMemcpy(plan, fftwgrid.data(), desc_mean, CUFFT_COPY_DEVICE_TO_HOST); cufftXtFree(desc); cufftXtFree(desc_mean); for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real(); gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag(); } } } cufftDestroy(plan); #endif //close ifndef ACCOMP #ifdef ONE_SIDE MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #else MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); #endif } void write_fftw_data(){ #ifdef USE_FFTW #ifdef WRITE_DATA // Write results #ifdef USE_MPI MPI_Win writewin; MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); MPI_Win_fence(0,writewin); #endif if (rank == 0) { printf("WRITING FFT TRANSFORMED DATA\n"); file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); #ifdef USE_MPI for (int isector=0; isector<nsectors; isector++) { MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); MPI_Win_unlock(isector,writewin); for (long i=0; i<size_of_grid/2; i++) { gridss_real[i] = gridss_w[2*i]; gridss_img[i] = gridss_w[2*i+1]; } if (param.num_w_planes > 1) { for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal); } for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg); } } else { fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal); fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg); } } #else /* for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<grid_size_y; iv++) for (int iu=0; iu<grid_size_x; iu++) { int isector = 0; long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); } */ #endif fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Win_fence(0,writewin); MPI_Win_free(&writewin); MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_DATA // Phase correction clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.phase_time1 = (finish.tv_sec - begin.tv_sec); timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; #ifdef WRITE_IMAGE if(rank == 0) { file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); file.pFilereal = fopen (out.fftfile2,"ab"); file.pFileimg = fopen (out.fftfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); fseek(file.pFilereal, global_index, SEEK_SET); fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal); fseek(file.pFileimg, global_index, SEEK_SET); fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg); fclose(file.pFilereal); fclose(file.pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE #endif //FFTW }
fourier_transform_test.cppdeleted 100755 → 0 +0 −313 Original line number Diff line number Diff line #include <stdio.h> #include "allvars.h" #include "proto.h" #include <vector> #include <complex> #include <iostream> #ifdef CUFFTMP #include <cufftMp.h> #include <cuda_runtime.h> #endif void fftw_data(){ #ifdef USE_FFTW // FFT transform the data (using distributed FFTW) if(rank == 0)printf("PERFORMING FFT\n"); clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); #ifndef CUFFTMP fftw_plan plan; fftw_complex *fftwgrid; ptrdiff_t alloc_local, local_n0, local_0_start; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); // map the 1D array of complex visibilities to a 2D array required by FFTW (complex[*][2]) // x is the direction of contiguous data and maps to the second parameter // y is the parallelized direction and corresponds to the first parameter (--> n0) // and perform the FFT per w plane alloc_local = fftw_mpi_local_size_2d(param.grid_size_y, param.grid_size_x, MPI_COMM_WORLD,&local_n0, &local_0_start); fftwgrid = fftw_alloc_complex(alloc_local); plan = fftw_mpi_plan_dft_2d(param.grid_size_y, param.grid_size_x, fftwgrid, fftwgrid, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D][0] = grid[fftwindex]; fftwgrid[fftwindex2D][1] = grid[fftwindex+1]; } } // do the transform for each w-plane fftw_execute(plan); // save the transformed w-plane for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D][0]; gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D][1]; } } } fftw_destroy_plan(plan); fftw_free(fftwgrid); //Implementation of cufftMp #else //Select the default device per each MPI task int ndevices; cudaGetDeviceCount(&ndevices); cudaSetDevice(rank % ndevices); //long my_start = rank*( param.grid_size_y/size ); //int my_ystart = rank*yaxis; //int my_yend = (rank+1)*yaxis; double norm = 1.0/(double)(param.grid_size_x*param.grid_size_y); //Choose the communicator and allocate the vector MPI_Comm comm = MPI_COMM_WORLD; size_t Ny = param.grid_size_y; size_t Nx = param.grid_size_x; std::vector<std::complex<double>> fftwgrid( (Ny/size) * Nx ); cufftHandle plan = 0; cudaStream_t stream = nullptr; cudaStreamCreate(&stream); //Create the plan for the FFT cufftCreate(&plan); size_t worksize; //Domain decomposition along the y-axis cufftMpAttachComm(plan, CUFFT_COMM_MPI, &comm); cufftSetStream(plan, stream); cufftMakePlan2d(plan, Ny, Nx, CUFFT_Z2Z, &worksize); long fftwindex = 0; long fftwindex2D = 0; for (int iw=0; iw<param.num_w_planes; iw++) { //printf("FFTing plan %d\n",iw); //select the w-plane to transform for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); fftwgrid[fftwindex2D] = {grid[fftwindex], grid[fftwindex+1]}; } } // do the transform for each w-plane //printf("Task %d: ffting the %d plane\n", rank, iw); cudaLibXtDesc *desc; cufftXtMalloc(plan, &desc, CUFFT_XT_FORMAT_INPLACE); cufftXtMemcpy(plan, desc, fftwgrid.data(), CUFFT_COPY_HOST_TO_DEVICE); cufftXtExecDescriptor(plan, desc, desc, CUFFT_INVERSE); cudaStreamSynchronize(stream); cufftXtMemcpy (plan, desc, desc, CUFFT_COPY_DEVICE_TO_DEVICE); cufftXtMemcpy(plan, fftwgrid.data(), desc, CUFFT_COPY_DEVICE_TO_HOST); cufftXtFree(desc); for (int iv=0; iv<yaxis; iv++) { for (int iu=0; iu<xaxis; iu++) { fftwindex2D = iu + iv*xaxis; fftwindex = 2*(fftwindex2D + iw*xaxis*yaxis); gridss[fftwindex] = norm*fftwgrid[fftwindex2D].real(); gridss[fftwindex+1] = norm*fftwgrid[fftwindex2D].imag(); } } } cufftDestroy(plan); #endif //close ifndef ACCOMP #ifdef ONE_SIDE MPI_Win_fence(0,slabwin); MPI_Barrier(MPI_COMM_WORLD); #else MPI_Barrier(MPI_COMM_WORLD); #endif end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.fftw_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.fftw_time1 = (finish.tv_sec - begin.tv_sec); timing.fftw_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; clock_gettime(CLOCK_MONOTONIC, &begin); #endif } void write_fftw_data(){ #ifdef USE_FFTW #ifdef WRITE_DATA // Write results #ifdef USE_MPI MPI_Win writewin; MPI_Win_create(gridss, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &writewin); MPI_Win_fence(0,writewin); #endif if (rank == 0) { printf("WRITING FFT TRANSFORMED DATA\n"); file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); #ifdef USE_MPI for (int isector=0; isector<nsectors; isector++) { MPI_Win_lock(MPI_LOCK_SHARED,isector,0,writewin); MPI_Get(gridss_w,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,writewin); MPI_Win_unlock(isector,writewin); for (long i=0; i<size_of_grid/2; i++) { gridss_real[i] = gridss_w[2*i]; gridss_img[i] = gridss_w[2*i+1]; } if (param.num_w_planes > 1) { for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFilereal, global_index, SEEK_SET); fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal); } for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<yaxis; iv++) for (int iu=0; iu<xaxis; iu++) { long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*param.grid_size_x*param.grid_size_y)*sizeof(double); long index = iu + iv*xaxis + iw*xaxis*yaxis; fseek(file.pFileimg, global_index, SEEK_SET); fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg); } } else { fwrite(gridss_real, size_of_grid/2, sizeof(double), file.pFilereal); fwrite(gridss_img, size_of_grid/2, sizeof(double), file.pFileimg); } } #else /* for (int iw=0; iw<param.num_w_planes; iw++) for (int iv=0; iv<grid_size_y; iv++) for (int iu=0; iu<grid_size_x; iu++) { int isector = 0; long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y); double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]); fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm); } */ #endif fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Win_fence(0,writewin); MPI_Win_free(&writewin); MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_DATA // Phase correction clock_gettime(CLOCK_MONOTONIC, &begin); start = clock(); if(rank == 0)printf("PHASE CORRECTION\n"); double* image_real = (double*) calloc(xaxis*yaxis,sizeof(double)); double* image_imag = (double*) calloc(xaxis*yaxis,sizeof(double)); phase_correction(gridss,image_real,image_imag,xaxis,yaxis,param.num_w_planes,param.grid_size_x,param.grid_size_y,resolution,metaData.wmin,metaData.wmax,param.num_threads,rank); end = clock(); clock_gettime(CLOCK_MONOTONIC, &finish); timing.phase_time = ((double) (end - start)) / CLOCKS_PER_SEC; timing.phase_time1 = (finish.tv_sec - begin.tv_sec); timing.phase_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0; #ifdef WRITE_IMAGE if(rank == 0) { file.pFilereal = fopen (out.fftfile2,"wb"); file.pFileimg = fopen (out.fftfile3,"wb"); fclose(file.pFilereal); fclose(file.pFileimg); } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(rank == 0)printf("WRITING IMAGE\n"); for (int isector=0; isector<size; isector++) { #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(isector == rank) { printf("%d writing\n",isector); file.pFilereal = fopen (out.fftfile2,"ab"); file.pFileimg = fopen (out.fftfile3,"ab"); long global_index = isector*(xaxis*yaxis)*sizeof(double); fseek(file.pFilereal, global_index, SEEK_SET); fwrite(image_real, xaxis*yaxis, sizeof(double), file.pFilereal); fseek(file.pFileimg, global_index, SEEK_SET); fwrite(image_imag, xaxis*yaxis, sizeof(double), file.pFileimg); fclose(file.pFilereal); fclose(file.pFileimg); } } #ifdef USE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif #endif //WRITE_IMAGE #endif //FFTW }