Commit d8e2a6a0 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

Better scalability of w-correction with OpenMP

parent d7aacba1
Loading
Loading
Loading
Loading
+16 −15
Original line number Diff line number Diff line
@@ -150,19 +150,20 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#else

#ifndef ACCOMP
	
#ifdef _OPENMP
	omp_set_num_threads(num_threads);
#endif
	
        #pragma omp parallel for collapse(3) private(wterm)
       #pragma omp parallel for collapse(2) private(wterm) 
	for (int iw=0; iw<num_w_planes; iw++)
	{
	    for (int iv=0; iv<yaxis; iv++)
            for (int iu=0; iu<xaxis; iu++)
            {

		long index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
		long img_index = iu+iv*xaxis;
		unsigned int index = 2*(iu+iv*xaxis+xaxis*yaxis*iw);
		unsigned int img_index = iu+iv*xaxis;
		wterm = wmin + iw*dw;
#ifdef PHASE_ON
                if (num_w_planes > 1)
@@ -218,11 +219,11 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in
#else
       #if !defined(__clang__)

       #pragma omp target teams distribute parallel for collapse(3) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #pragma omp target teams distribute parallel for collapse(2) simd private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())

       #else

       #pragma omp target teams distribute parallel for collapse(3) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #pragma omp target teams distribute parallel for collapse(2) private(wterm) map(to:gridss[0:2*num_w_planes*xaxis*yaxis]) map(from:image_real[0:xaxis*yaxis]) map(from:image_imag[0:xaxis*yaxis]) device(rank % omp_get_num_devices())
       #endif
	
	for (int iw=0; iw<num_w_planes; iw++)