Commit 9d47e94e authored by Claudio Gheller's avatar Claudio Gheller
Browse files

first CUDA implementation of phase_correction. Still buggy

parent 96c6e08a
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ OPT += -DONE_SIDE
# write the final image
OPT += -DWRITE_IMAGE
# perform w-stacking phase correction
OPT += -DPHASE_ON
#OPT += -DPHASE_ON

CC = gcc
CXX = g++
@@ -46,7 +46,7 @@ serial: $(COBJ)
	$(CC) $(OMP) -o w-stackingCfftw_serial $(CFLAGS) $^ -lm

serial_cuda:
	$(NVCC) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB)
	$(NVCC) $(OPT) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB)
	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw_serial w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) -lm

@@ -54,7 +54,7 @@ mpi: $(COBJ)
	$(CC) $(OMP) -o w-stackingCfftw $(CFLAGS) $^ $(LIBS)

mpi_cuda:
	$(NVCC) $(NVFLAGS) -c w-stacking.cu phase_correction.cu $(NVLIB)
	$(NVCC) $(NVFLAGS) $(OPT) -c w-stacking.cu phase_correction.cu $(NVLIB)
	$(CC) $(CFLAGS) $(OPT) -c w-stacking-fftw.c
	$(CXX) $(CFLAGS) $(OPT) -o w-stackingfftw w-stacking-fftw.o w-stacking.o phase_correction.o $(NVLIB) $(LIBS) -lm

+113 −6
Original line number Diff line number Diff line
@@ -7,19 +7,126 @@
#include <stdio.h>

#ifdef __CUDACC__

__global__ void phase_g(int xaxis, 
		        int yaxis,
			int num_w_planes,
			double * gridss,
			double * image_real,
			double * image_imag,
			double wmin,
			double dw,
			double dwnorm,
			int xaxistot,
			int yaxistot,
			double resolution)
{
	long gid = blockIdx.x*blockDim.x + threadIdx.x;
	double add_term_real;
	double add_term_img;
	double wterm;
	long arraysize = xaxis*yaxis*num_w_planes;

	if(gid < arraysize)
	{
		int iw = (int)(gid/(xaxis*yaxis));
		int iv = (int)((gid%(xaxis*yaxis))/xaxis);
		int iu = (iv%yaxis);
		long index = 2*gid;
		long img_index = iu+iv*xaxis;

                wterm = wmin + iw*dw;

#ifdef PHASE_ON
                if (num_w_planes > 1)
                {
                    double xcoord = (double)(iu-xaxistot/2);
                    if(xcoord < 0.0)xcoord = (double)(iu+xaxistot/2);
                    xcoord = sin(xcoord*resolution);
                    double ycoord = (double)(iv-yaxistot/2);
                    if(ycoord < 0.0)ycoord = (double)(iv+yaxistot/2);
                    ycoord = sin(ycoord*resolution);

                    double preal, pimag;
                    double radius2 = (xcoord*xcoord+ycoord*ycoord);

                    preal = cos(2.0*PI*wterm*(sqrt(1-radius2)-1.0));
                    pimag = sin(2.0*PI*wterm*(sqrt(1-radius2)-1.0));

                    double p,q,r,s;
                    p = gridss[index];
                    q = gridss[index+1];
                    r = preal;
                    s = pimag;

                    //printf("%d %d %d %ld %ld\n",iu,iv,iw,index,img_index);

		    add_term_real = (p*r-q*s)*dwnorm*sqrt(1-radius2);
		    add_term_img = (p*s+q*r)*dwnorm*sqrt(1-radius2);
		    atomicAdd(&(image_real[img_index]),add_term_real);
		    atomicAdd(&(image_imag[img_index]),add_term_img);
                } else {
		    atomicAdd(&(image_real[img_index]),gridss[index]);
		    atomicAdd(&(image_imag[img_index]),gridss[index+1]);
                }
#else
		atomicAdd(&(image_real[img_index]),gridss[index]);
		atomicAdd(&(image_imag[img_index]),gridss[index+1]);
#endif // end of PHASE_ON

	}

}

#endif

void phase_correction(double* gridss, double* image_real, double* image_imag, int xaxis, int yaxis, int num_w_planes, int xaxistot, int yaxistot,
		      double resolution, double wmin, double wmax)
{
	double dnum_w_planes = (double)num_w_planes;
	double dxaxistot = (double)xaxistot;
	double dyaxistot = (double)yaxistot;
	double diagonal;
        double dw = (wmax-wmin)/(double)num_w_planes;
	double wterm = wmin+0.5*dw;
	double dwnorm = dw/(wmax-wmin);

#ifdef __CUDACC__

        int Nth = NTHREADS;
        long Nbl = (long)((num_w_planes*xaxis*yaxis)/Nth) + 1;
        if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
        printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);
	

	cudaError_t mmm;
	double * image_real_g;
	double * image_imag_g;
	double * gridss_g;

        mmm=cudaMalloc(&gridss_g, 2*num_w_planes*xaxis*yaxis*sizeof(double));
	mmm=cudaMalloc(&image_real_g, xaxis*yaxis*sizeof(double));
	mmm=cudaMalloc(&image_imag_g, xaxis*yaxis*sizeof(double));

	mmm=cudaMemcpy(gridss_g, gridss, 2*num_w_planes*xaxis*yaxis*sizeof(double), cudaMemcpyHostToDevice);
	mmm=cudaMemset(image_real_g, 0.0, xaxis*yaxis*sizeof(double));
	mmm=cudaMemset(image_imag_g, 0.0, xaxis*yaxis*sizeof(double));

	// call the phase correction kernel
	phase_g <<<Nbl,Nth>>> (xaxis,
                               yaxis,
			       num_w_planes,
                               gridss_g,
                               image_real_g,
                               image_imag_g,
                               wmin,
                               dw,
                               dwnorm,
                               xaxistot,
                               yaxistot,
                               resolution);

	mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);
	mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost);

#else

	for (int iw=0; iw<num_w_planes; iw++)
	{
	    for (int iv=0; iv<yaxis; iv++)
@@ -38,7 +145,6 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
                    if(ycoord < 0.0)ycoord = (double)(iv+yaxistot/2);
		    ycoord = sin(ycoord*resolution);

		    double efact;
		    double preal, pimag;
		    double radius2 = (xcoord*xcoord+ycoord*ycoord);

@@ -61,12 +167,13 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#else
  	        image_real[img_index] += gridss[index];
		image_imag[img_index] += gridss[index+1];
#endif
#endif // end of PHASE_ON

            }  
	    wterm += dw;
	}

#endif // end of __CUDACC__


}
+25 −25
Original line number Diff line number Diff line
@@ -78,24 +78,24 @@ int main(int argc, char * argv[])
	float * visreal;
	float * visimg;

	long Nmeasures;
	long Nvis;
	long Nweights;
	long freq_per_chan;
	long polarisations;
        long Ntimes;
	double dt;
	double thours;
	long baselines;
	double uvmin;
	double uvmax;
	double wmin;
	double wmax;
	long Nmeasures,Nmeasures0;
	long Nvis,Nvis0;
	long Nweights,Nweights0;
	long freq_per_chan,freq_per_chan0;
	long polarisations,polarisations0;
        long Ntimes,Ntimes0;
	double dt,dt0;
	double thours,thours0;
	long baselines,baselines0;
	double uvmin,uvmin0;
	double uvmax,uvmax0;
	double wmin,wmin0;
	double wmax,wmax0;
	double resolution;

        // MESH SIZE
	int grid_size_x = 2048;
	int grid_size_y = 2048;
	int grid_size_x = 256;
	int grid_size_y = 256;
	int local_grid_size_x;// = 8;
	int local_grid_size_y;// = 8;
	int xaxis;
@@ -372,18 +372,18 @@ int main(int argc, char * argv[])
        strcpy(filename,datapath);
        strcat(filename,metafile);
        pFile = fopen (filename,"r");
        fscanf(pFile,"%ld",&Nmeasures);
        fscanf(pFile,"%ld",&Nvis);
        fscanf(pFile,"%ld",&freq_per_chan);
        fscanf(pFile,"%ld",&polarisations);
        fscanf(pFile,"%ld",&Ntimes);
        fscanf(pFile,"%lf",&dt);
        fscanf(pFile,"%lf",&thours);
        fscanf(pFile,"%ld",&baselines);
        fscanf(pFile,"%ld",&Nmeasures0);
        fscanf(pFile,"%ld",&Nvis0);
        fscanf(pFile,"%ld",&freq_per_chan0);
        fscanf(pFile,"%ld",&polarisations0);
        fscanf(pFile,"%ld",&Ntimes0);
        fscanf(pFile,"%lf",&dt0);
        fscanf(pFile,"%lf",&thours0);
        fscanf(pFile,"%ld",&baselines0);
        fscanf(pFile,"%lf",&uvmin);
        fscanf(pFile,"%lf",&uvmax);
        fscanf(pFile,"%lf",&wmin);
        fscanf(pFile,"%lf",&wmax);
        fscanf(pFile,"%lf",&wmin0);
        fscanf(pFile,"%lf",&wmax0);
        fclose(pFile);

        // calculate the resolution in radians
+1 −1
Original line number Diff line number Diff line
@@ -147,7 +147,7 @@ void wstack(
#ifdef __CUDACC__
    // Define the CUDA set up
    int Nth = NTHREADS;
    int Nbl = num_points/Nth + 1;
    long Nbl = (long)(num_points/Nth) + 1;
    if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
    long Nvis = num_points*freq_per_chan*polarizations;
    printf("Running on GPU with %d threads and %d blocks\n",Nth,Nbl);