- update spere and inclusion to treat first wavelength on the same footing as the others (0219486d) · Commits · Giacomo Mulas / NP_TMcode

src/include/IterationData.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -313,6 +313,8 @@ public:
		int proc_device;
		//! \brief Refinement mode selction flag.
		int refinemode;
		//! \brief flag defining a first iteration
		bool is_first_scale;
		//! \brief Maximum number of refinement iterations.
		int maxrefiters;
		//! \brief Required accuracy level.
		@@ -484,6 +486,8 @@ public:
		double **tqss;
		//! \brief Scattering coefficients tensor.
		double ****zpv;
		//! \brief flag for first time initialisation
		bool is_first_scale;

		/*! \brief `SphereIterationData` default instance constructor.
		*

src/inclusion/inclusion.cpp

+51 −42

Original line number	Diff line number	Diff line
		@@ -278,50 +278,50 @@ void inclusion(const string& config_file, const string& data_file, const string&
		}

		// do the first iteration on jxi488 separately, since it seems to be different from the others
		int jxi488 = 1;
		int jxi488;
		int initialmaxrefiters = cid->maxrefiters;

		chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
		#ifdef USE_NVTX
		nvtxRangePush("First iteration");
		#endif
		// chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
		// #ifdef USE_NVTX
		// nvtxRangePush("First iteration");
		// #endif
		// use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
		int jer = 0;
		#pragma omp parallel
		{
		#pragma omp single
		{
		jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
		}
		}
		#ifdef USE_NVTX
		nvtxRangePop();
		#endif
		chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
		elapsed = start_iter_1 - t_start;
		string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
		logger->log(message);
		time_logger->log(message);
		elapsed = end_iter_1 - start_iter_1;
		message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
		logger->log(message);
		time_logger->log(message);
		// #pragma omp parallel
		// {
		// #pragma omp single
		// {
		// jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
		// }
		// }
		// #ifdef USE_NVTX
		// nvtxRangePop();
		// #endif
		// chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
		// elapsed = start_iter_1 - t_start;
		// string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
		// logger->log(message);
		// time_logger->log(message);
		// elapsed = end_iter_1 - start_iter_1;
		// message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
		// logger->log(message);
		// time_logger->log(message);
		/* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
		cid->refinemode = 0;
		/* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
		// if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
		if (jer != 0) {
		// First loop failed. Halt the calculation.
		fclose(timing_file);
		delete time_logger;
		delete p_output;
		delete p_scattering_angles;
		delete cid;
		delete logger;
		delete sconf;
		delete gconf;
		return;
		}
		// cid->refinemode = 0;
		// /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
		// // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
		// if (jer != 0) {
		// // First loop failed. Halt the calculation.
		// fclose(timing_file);
		// delete time_logger;
		// delete p_output;
		// delete p_scattering_angles;
		// delete cid;
		// delete logger;
		// delete sconf;
		// delete gconf;
		// return;
		// }

		//==================================================
		// do the first outputs here, so that I open here the new files, afterwards I only append
		@@ -415,7 +415,7 @@ void inclusion(const string& config_file, const string& data_file, const string&
		}
		#pragma omp barrier
		// ok, now I can actually start the parallel calculations
		for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
		for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
		// the parallel loop over MPI processes covers a different set of indices for each thread
		#pragma omp barrier
		int myjxi488 = ixi488+myompthread;
		@@ -595,7 +595,7 @@ void inclusion(const string& config_file, const string& data_file, const string&
		// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
		#pragma omp barrier
		// ok, now I can actually start the parallel calculations
		for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
		for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
		// the parallel loop over MPI processes covers a different set of indices for each thread
		#pragma omp barrier
		int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
		@@ -685,7 +685,9 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
		int jer = 0;
		int lcalc = 0;
		int jaw = 1;
		bool is_first_scale = (jxi488 == 1);
		// this is now part of cid, so don't mess with it here, just copy it by reference
		bool &is_first_scale = cid->is_first_scale;
		//bool is_first_scale = (jxi488 == 1);
		// int li = cid->c1->li;
		// int le = cid->c1->le;
		// int lm = cid->c1->lm;
		@@ -1414,6 +1416,8 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
		} // jph484 loop
		th += sa->thstp;
		} // jth486 loop
		// at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes
		is_first_scale = 0;
		#ifdef USE_NVTX
		nvtxRangePop();
		#endif
		@@ -1534,6 +1538,8 @@ InclusionIterationData::InclusionIterationData(GeometryConfiguration *gconf, Sca
		proc_device = 0;
		#endif

		// the first time the object is used, it will be a "first iteration", to ensure data structures are properly initialised.
		is_first_scale = 1;
		// In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
		refinemode = 2;
		// maxrefiters and accuracygoal should be configurable and preferably set somewhere else
		@@ -1685,6 +1691,7 @@ InclusionIterationData::InclusionIterationData(const InclusionIterationData& rhs
		extr = rhs.extr;

		proc_device = rhs.proc_device;
		is_first_scale = rhs.is_first_scale;
		refinemode = rhs.refinemode;
		maxrefiters = rhs.maxrefiters;
		accuracygoal = rhs.accuracygoal;
		@@ -1823,6 +1830,7 @@ InclusionIterationData::InclusionIterationData(const mixMPI *mpidata, const int
		proc_device = 0;
		#endif
		MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
		MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		}
		@@ -1888,6 +1896,7 @@ void InclusionIterationData::mpibcast(const mixMPI *mpidata) {
		MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
		MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		}

src/sphere/sphere.cpp

+32 −21

Original line number	Diff line number	Diff line
		@@ -201,25 +201,25 @@ void sphere(const string& config_file, const string& data_file, const string& ou
		}

		// Do the first wavelength iteration
		int jxi488 = 1;
		// int jxi488 = 1;
		// Use pragmas to put OMP parallelism to second level.
		int jer = 0;
		#pragma omp parallel
		{
		#pragma omp single
		{
		jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp);
		} // OMP single
		} // OMP parallel
		if (jer != 0) { // First iteration failed. Halt the calculation.
		delete p_output;
		delete p_sa;
		delete sid;
		delete logger;
		delete sconf;
		delete gconf;
		return;
		}
		// #pragma omp parallel
		// {
		// #pragma omp single
		// {
		// jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp);
		// } // OMP single
		// } // OMP parallel
		// if (jer != 0) { // First iteration failed. Halt the calculation.
		// delete p_output;
		// delete p_sa;
		// delete sid;
		// delete logger;
		// delete sconf;
		// delete gconf;
		// return;
		// }

		//==================================================
		// do the first outputs here, so that I open here the new files, afterwards I only append
		@@ -310,7 +310,7 @@ void sphere(const string& config_file, const string& data_file, const string& ou
		}
		#pragma omp barrier
		// ok, now I can actually start the parallel calculations
		for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
		for (int ixi488=1; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
		// the parallel loop over MPI processes covers a different set of indices for each thread
		#pragma omp barrier
		int myjxi488 = ixi488+myompthread;
		@@ -463,7 +463,7 @@ void sphere(const string& config_file, const string& data_file, const string& ou
		// make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
		#pragma omp barrier
		// ok, now I can actually start the parallel calculations
		for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
		for (int ixi488=1; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
		// the parallel loop over MPI processes covers a different set of indices for each thread
		#pragma omp barrier
		int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
		@@ -546,7 +546,9 @@ int sphere_jxi488_cycle(
		int oindex = 0;
		int jxi = jxi488 + 1;
		int jxindex = jxi - oi->first_xi;
		bool is_first_scale = (jxi == 1);
		// this is now part of sid, so don't mess with it here, just copy it by reference
		bool &is_first_scale = sid->is_first_scale;
		//bool is_first_scale = (jxi == 1);
		int nsph = gconf->number_of_spheres;
		//int l_max = gconf->l_max;
		int in_pol = gconf->in_pol;
		@@ -889,6 +891,8 @@ int sphere_jxi488_cycle(
		} // jph484 loop on elevation
		th += sa->thstp;
		} // jth486 loop on azimuth
		// at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes
		is_first_scale = 0;
		oi->vec_jxi[jxindex] = jxi;
		logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n");
		delete logger;
		@@ -977,6 +981,7 @@ SphereIterationData::SphereIterationData(
		zpv[zi][zj][1] = (vec_zpv + vec_index + 2);
		}
		}
		is_first_scale = 1;
		}

		SphereIterationData::SphereIterationData(const SphereIterationData &rhs) {
		@@ -1082,6 +1087,7 @@ SphereIterationData::SphereIterationData(const SphereIterationData &rhs) {
		zpv[zi][zj][1] = (vec_zpv + vec_index + 2);
		}
		}
		is_first_scale = rhs.is_first_scale;
		}

		SphereIterationData::~SphereIterationData() {
		@@ -1207,6 +1213,9 @@ SphereIterationData::SphereIterationData(const mixMPI *mpidata, const int device
		// Collect vectors whose size depend on LM
		vec_zpv = new double[12 * _lm];
		MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD);

		MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);

		}

		int SphereIterationData::mpibcast(const mixMPI *mpidata) {
		@@ -1270,6 +1279,8 @@ int SphereIterationData::mpibcast(const mixMPI *mpidata) {
		// Broadcast vectors whose size depend on LM
		MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD);

		MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);

		return 0;
		}
		#endif // MPI_VERSION