Commit d8e2a6a0 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Better scalability of w-correction with OpenMP

parent d7aacba1
Loading
Loading
Loading
Loading
+16 −15
Original line number Original line Diff line number Diff line
@@ -150,19 +150,20 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#else
#else


#ifndef ACCOMP
#ifndef ACCOMP
	
#ifdef _OPENMP
#ifdef _OPENMP
	omp_set_num_threads(num_threads);
	omp_set_num_threads(num_threads);
#endif
#endif
	
	
        #pragma omp parallel for collapse(3) private(wterm)
       #pragma omp parallel for collapse(2) private(wterm) 
	for (int iw=0; iw<num_w_planes; iw++)
	for (int iw=0; iw<num_w_planes; iw++)
	{
	{
	    for (int iv=0; iv<yaxis; iv++)
	    for (int iv=0; iv<yaxis; iv++)
            for (int iu=0; iu<xaxis; iu++)
            for (int iu=0; iu<xaxis; iu++)
            {
            {


		long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
		unsigned int index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
		long img_index = iu+iv*xaxis;
		unsigned int img_index = iu+iv*xaxis;
		wterm = wmin + iw*dw;
		wterm = wmin + iw*dw;
#ifdef PHASE_ON
#ifdef PHASE_ON
                if (num_w_planes > 1)
                if (num_w_planes > 1)
@@ -218,11 +219,11 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#else
#else
       #if !defined(__clang__)
       #if !defined(__clang__)


       #pragma omp target teams distribute parallel for collapse(3) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #pragma omp target teams distribute parallel for collapse(2) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())


       #else
       #else


       #pragma omp target teams distribute parallel for collapse(3) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #pragma omp target teams distribute parallel for collapse(2) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #endif
       #endif
	
	
	for (int iw=0; iw<num_w_planes; iw++)
	for (int iw=0; iw<num_w_planes; iw++)