Fixed data types and print intermediate data (AMD & CUDA version) (cd33eaa6) · Commits · Claudio Gheller / HPC_Imaging

allvars_rccl.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -121,7 +121,7 @@ extern struct meta
		{

		myuint Nmeasures;
		myuint Nvis;
		myull Nvis;
		myuint Nweights;
		myuint freq_per_chan;
		myuint polarisations;

allvars_rccl.hip.hpp

+14 −14

Original line number	Diff line number	Diff line
		@@ -71,8 +71,8 @@ typedef double float_t;
		typedef float float_t;
		#endif

		typedef unsigned int uint;
		typedef unsigned long long ull;
		typedef unsigned int myuint;
		typedef unsigned long long myull;


		extern struct io
		@@ -115,15 +115,15 @@ extern struct op
		extern struct meta
		{

		uint Nmeasures;
		uint Nvis;
		uint Nweights;
		uint freq_per_chan;
		uint polarisations;
		uint Ntimes;
		myuint Nmeasures;
		myull Nvis;
		myuint Nweights;
		myuint freq_per_chan;
		myuint polarisations;
		myuint Ntimes;
		double dt;
		double thours;
		uint baselines;
		myuint baselines;
		double uvmin;
		double uvmax;
		double wmin;
		@@ -159,16 +159,16 @@ extern char datapath[LONGNAME_LEN];
		extern int xaxis, yaxis;
		extern int rank;
		extern int size;
		extern uint nsectors;
		extern uint startrow;
		extern myuint nsectors;
		extern myuint startrow;
		extern double_t resolution, dx, dw, w_supporth;

		extern uint **sectorarray;
		extern uint *histo_send;
		extern myuint **sectorarray;
		extern myuint *histo_send;
		extern int verbose_level;


		extern uint size_of_grid;
		extern myuint size_of_grid;
		extern double_t grid_pointers, grid, gridss, gridss_real, gridss_img, gridss_w, grid_gpu, gridss_gpu;

		extern MPI_Comm MYMPI_COMM_WORLD;

w-stacking.cu

+6 −6

Original line number	Diff line number	Diff line
		@@ -134,7 +134,7 @@ __global__ void convolve_g(
		if(gid < num_points)
		{
		myuint i = gid;
		unsigned long visindex = ifreq_per_chanpolarizations;
		myull visindex = ifreq_per_chanpolarizations;
		double norm = std22/PI;

		int j, k;
		@@ -182,7 +182,7 @@ __global__ void convolve_g(
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		unsigned long ifine = visindex;
		myull ifine = visindex;
		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
		myuint iweight = visindex/freq_per_chan;
		@@ -245,7 +245,7 @@ void wstack(
		{
		myuint i;
		//myuint index;
		unsigned long visindex;
		myull visindex;

		// initialize the convolution kernel
		// gaussian:
		@@ -274,7 +274,7 @@ void wstack(
		int Nth = NTHREADS;
		myuint Nbl = (myuint)(num_points/Nth) + 1;
		if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
		unsigned long Nvis = num_pointsfreq_per_chanpolarizations;
		myull Nvis = num_pointsfreq_per_chanpolarizations;

		int ndevices;
		cudaGetDeviceCount(&ndevices);
		@@ -441,7 +441,7 @@ void wstack(

		#if defined(ACCOMP) && (GPU_STACKING)
		omp_set_default_device(rank % omp_get_num_devices());
		myuint Nvis = num_pointsfreq_per_chanpolarizations;
		myull Nvis = num_pointsfreq_per_chanpolarizations;
		#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2num_w_planesgrid_size_x*grid_size_y])
		#else
		#pragma omp parallel for private(visindex)
		@@ -507,7 +507,7 @@ void wstack(
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		unsigned long ifine = visindex;
		myull ifine = visindex;
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{

w-stacking.hip.cpp

+6 −6

Original line number	Diff line number	Diff line
		@@ -130,7 +130,7 @@ __global__ void convolve_g(
		if(gid < num_points)
		{
		myuint i = gid;
		myuint visindex = ifreq_per_chanpolarizations;
		myull visindex = ifreq_per_chanpolarizations;
		double norm = std22/PI;

		int j, k;
		@@ -178,7 +178,7 @@ __global__ void convolve_g(
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		myuint ifine = visindex;
		myull ifine = visindex;
		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{
		myuint iweight = visindex/freq_per_chan;
		@@ -233,7 +233,7 @@ void wstack(
		{
		myuint i;
		//myuint index;
		myuint visindex;
		myull visindex;

		// initialize the convolution kernel
		// gaussian:
		@@ -262,7 +262,7 @@ void wstack(
		int Nth = NTHREADS;
		myuint Nbl = (myuint)(num_points/Nth) + 1;
		if(NWORKERS == 1) {Nbl = 1; Nth = 1;};
		myuint Nvis = num_pointsfreq_per_chanpolarizations;
		myull Nvis = num_pointsfreq_per_chanpolarizations;

		int ndevices;
		int num = hipGetDeviceCount(&ndevices);
		@@ -404,7 +404,7 @@ void wstack(

		#if defined(ACCOMP) && (GPU_STACKING)
		omp_set_default_device(rank % omp_get_num_devices());
		myuint Nvis = num_pointsfreq_per_chanpolarizations;
		myull Nvis = num_pointsfreq_per_chanpolarizations;
		#pragma omp target teams distribute parallel for private(visindex) map(to:uu[0:num_points], vv[0:num_points], ww[0:num_points], vis_real[0:Nvis], vis_img[0:Nvis], weight[0:Nvis/freq_per_chan]) map(tofrom:grid[0:2num_w_planesgrid_size_x*grid_size_y])
		#else
		#pragma omp parallel for private(visindex)
		@@ -470,7 +470,7 @@ void wstack(
		// Loops over frequencies and polarizations
		double add_term_real = 0.0;
		double add_term_img = 0.0;
		myuint ifine = visindex;
		myull ifine = visindex;
		// DAV: the following two loops are performend by each thread separately: no problems of race conditions
		for (myuint ifreq=0; ifreq<freq_per_chan; ifreq++)
		{