Loading allvars_rccl.h +1 −1 Original line number Diff line number Diff line Loading @@ -121,7 +121,7 @@ extern struct meta { myuint Nmeasures; myuint Nvis; myull Nvis; myuint Nweights; myuint freq_per_chan; myuint polarisations; Loading allvars_rccl.hip.hpp +14 −14 Original line number Diff line number Diff line Loading @@ -71,8 +71,8 @@ typedef double float_t; typedef float float_t; #endif typedef unsigned int uint; typedef unsigned long long ull; typedef unsigned int myuint; typedef unsigned long long myull; extern struct io Loading Loading @@ -115,15 +115,15 @@ extern struct op extern struct meta { uint Nmeasures; uint Nvis; uint Nweights; uint freq_per_chan; uint polarisations; uint Ntimes; myuint Nmeasures; myull Nvis; myuint Nweights; myuint freq_per_chan; myuint polarisations; myuint Ntimes; double dt; double thours; uint baselines; myuint baselines; double uvmin; double uvmax; double wmin; Loading Loading @@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN]; extern int xaxis, yaxis; extern int rank; extern int size; extern uint nsectors; extern uint startrow; extern myuint nsectors; extern myuint startrow; extern double_t resolution, dx, dw, w_supporth; extern uint **sectorarray; extern uint *histo_send; extern myuint **sectorarray; extern myuint *histo_send; extern int verbose_level; extern uint size_of_grid; extern myuint size_of_grid; extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu; extern MPI_Comm MYMPI_COMM_WORLD; Loading w-stacking.cu +6 −6 Original line number Diff line number Diff line Loading @@ -134,7 +134,7 @@ __global__ void convolve_g( if(gid < num_points) { myuint i = gid; unsigned long visindex = i*freq_per_chan*polarizations; myull visindex = i*freq_per_chan*polarizations; double norm = std22/PI; int j, k; Loading Loading @@ -182,7 +182,7 @@ __global__ void convolve_g( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; unsigned long ifine = visindex; myull ifine = visindex; for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { myuint iweight = visindex/freq_per_chan; Loading Loading @@ -245,7 +245,7 @@ void wstack( { myuint i; //myuint index; unsigned long visindex; myull visindex; // initialize the convolution kernel // gaussian: Loading Loading @@ -274,7 +274,7 @@ void wstack( int Nth = NTHREADS; myuint Nbl = (myuint)(num_points/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; unsigned long Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; int ndevices; cudaGetDeviceCount(&ndevices); Loading Loading @@ -441,7 +441,7 @@ void wstack( #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) Loading Loading @@ -507,7 +507,7 @@ void wstack( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; unsigned long ifine = visindex; myull ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { Loading w-stacking.hip.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -130,7 +130,7 @@ __global__ void convolve_g( if(gid < num_points) { myuint i = gid; myuint visindex = i*freq_per_chan*polarizations; myull visindex = i*freq_per_chan*polarizations; double norm = std22/PI; int j, k; Loading Loading @@ -178,7 +178,7 @@ __global__ void convolve_g( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; myuint ifine = visindex; myull ifine = visindex; for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { myuint iweight = visindex/freq_per_chan; Loading Loading @@ -233,7 +233,7 @@ void wstack( { myuint i; //myuint index; myuint visindex; myull visindex; // initialize the convolution kernel // gaussian: Loading Loading @@ -262,7 +262,7 @@ void wstack( int Nth = NTHREADS; myuint Nbl = (myuint)(num_points/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; int ndevices; int num = hipGetDeviceCount(&ndevices); Loading Loading @@ -404,7 +404,7 @@ void wstack( #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) Loading Loading @@ -470,7 +470,7 @@ void wstack( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; myuint ifine = visindex; myull ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { Loading Loading
allvars_rccl.h +1 −1 Original line number Diff line number Diff line Loading @@ -121,7 +121,7 @@ extern struct meta { myuint Nmeasures; myuint Nvis; myull Nvis; myuint Nweights; myuint freq_per_chan; myuint polarisations; Loading
allvars_rccl.hip.hpp +14 −14 Original line number Diff line number Diff line Loading @@ -71,8 +71,8 @@ typedef double float_t; typedef float float_t; #endif typedef unsigned int uint; typedef unsigned long long ull; typedef unsigned int myuint; typedef unsigned long long myull; extern struct io Loading Loading @@ -115,15 +115,15 @@ extern struct op extern struct meta { uint Nmeasures; uint Nvis; uint Nweights; uint freq_per_chan; uint polarisations; uint Ntimes; myuint Nmeasures; myull Nvis; myuint Nweights; myuint freq_per_chan; myuint polarisations; myuint Ntimes; double dt; double thours; uint baselines; myuint baselines; double uvmin; double uvmax; double wmin; Loading Loading @@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN]; extern int xaxis, yaxis; extern int rank; extern int size; extern uint nsectors; extern uint startrow; extern myuint nsectors; extern myuint startrow; extern double_t resolution, dx, dw, w_supporth; extern uint **sectorarray; extern uint *histo_send; extern myuint **sectorarray; extern myuint *histo_send; extern int verbose_level; extern uint size_of_grid; extern myuint size_of_grid; extern double_t *grid_pointers, *grid, *gridss, *gridss_real, *gridss_img, *gridss_w, *grid_gpu, *gridss_gpu; extern MPI_Comm MYMPI_COMM_WORLD; Loading
w-stacking.cu +6 −6 Original line number Diff line number Diff line Loading @@ -134,7 +134,7 @@ __global__ void convolve_g( if(gid < num_points) { myuint i = gid; unsigned long visindex = i*freq_per_chan*polarizations; myull visindex = i*freq_per_chan*polarizations; double norm = std22/PI; int j, k; Loading Loading @@ -182,7 +182,7 @@ __global__ void convolve_g( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; unsigned long ifine = visindex; myull ifine = visindex; for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { myuint iweight = visindex/freq_per_chan; Loading Loading @@ -245,7 +245,7 @@ void wstack( { myuint i; //myuint index; unsigned long visindex; myull visindex; // initialize the convolution kernel // gaussian: Loading Loading @@ -274,7 +274,7 @@ void wstack( int Nth = NTHREADS; myuint Nbl = (myuint)(num_points/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; unsigned long Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; int ndevices; cudaGetDeviceCount(&ndevices); Loading Loading @@ -441,7 +441,7 @@ void wstack( #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) Loading Loading @@ -507,7 +507,7 @@ void wstack( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; unsigned long ifine = visindex; myull ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { Loading
w-stacking.hip.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -130,7 +130,7 @@ __global__ void convolve_g( if(gid < num_points) { myuint i = gid; myuint visindex = i*freq_per_chan*polarizations; myull visindex = i*freq_per_chan*polarizations; double norm = std22/PI; int j, k; Loading Loading @@ -178,7 +178,7 @@ __global__ void convolve_g( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; myuint ifine = visindex; myull ifine = visindex; for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { myuint iweight = visindex/freq_per_chan; Loading Loading @@ -233,7 +233,7 @@ void wstack( { myuint i; //myuint index; myuint visindex; myull visindex; // initialize the convolution kernel // gaussian: Loading Loading @@ -262,7 +262,7 @@ void wstack( int Nth = NTHREADS; myuint Nbl = (myuint)(num_points/Nth) + 1; if(NWORKERS == 1) {Nbl = 1; Nth = 1;}; myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; int ndevices; int num = hipGetDeviceCount(&ndevices); Loading Loading @@ -404,7 +404,7 @@ void wstack( #if defined(ACCOMP) && (GPU_STACKING) omp_set_default_device(rank % omp_get_num_devices()); myuint Nvis = num_points*freq_per_chan*polarizations; myull Nvis = num_points*freq_per_chan*polarizations; #pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2*num_w_planes*grid_size_x*grid_size_y]) #else #pragma omp parallel for private(visindex) Loading Loading @@ -470,7 +470,7 @@ void wstack( // Loops over frequencies and polarizations double add_term_real = 0.0; double add_term_img = 0.0; myuint ifine = visindex; myull ifine = visindex; // DAV: the following two loops are performend by each thread separately: no problems of race conditions for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++) { Loading