Commit 0219486d authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

- update spere and inclusion to treat first wavelength on the same footing as the others

parent 52f66298
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -313,6 +313,8 @@ public:
  int proc_device;
  //! \brief Refinement mode selction flag.
  int refinemode;
  //! \brief flag defining a first iteration
  bool is_first_scale;
  //! \brief Maximum number of refinement iterations.
  int maxrefiters;
  //! \brief Required accuracy level.
@@ -484,6 +486,8 @@ public:
  double **tqss;
  //! \brief Scattering coefficients tensor.
  double ****zpv;
  //! \brief flag for first time initialisation
  bool is_first_scale;
  
  /*! \brief `SphereIterationData` default instance constructor.
   *
+51 −42
Original line number Diff line number Diff line
@@ -278,50 +278,50 @@ void inclusion(const string& config_file, const string& data_file, const string&
      }
      
      // do the first iteration on jxi488 separately, since it seems to be different from the others
      int jxi488 = 1;
      int jxi488;
      int initialmaxrefiters = cid->maxrefiters;
      
      chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
      nvtxRangePush("First iteration");
#endif
//       chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
// #ifdef USE_NVTX
//       nvtxRangePush("First iteration");
// #endif
      // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
      int jer = 0;
#pragma omp parallel
      {
#pragma omp single
	{
	  jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
	}
      }
#ifdef USE_NVTX
      nvtxRangePop();
#endif
      chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
      elapsed = start_iter_1 - t_start;
      string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
      logger->log(message);
      time_logger->log(message);
      elapsed = end_iter_1 - start_iter_1;
      message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
      logger->log(message);
      time_logger->log(message);
// #pragma omp parallel
//       {
// #pragma omp single
// 	{
// 	  jer = inclusion_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
// 	}
//       }
// #ifdef USE_NVTX
//       nvtxRangePop();
// #endif
//       chrono::time_point<chrono::high_resolution_clock> end_iter_1 = chrono::high_resolution_clock::now();
//       elapsed = start_iter_1 - t_start;
//       string message = "INFO: Calculation setup took " + to_string(elapsed.count()) + "s.\n";
//       logger->log(message);
//       time_logger->log(message);
//       elapsed = end_iter_1 - start_iter_1;
//       message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
//       logger->log(message);
//       time_logger->log(message);
      /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
      cid->refinemode = 0;
      /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
      // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
      if (jer != 0) {
	// First loop failed. Halt the calculation.
	fclose(timing_file);
	delete time_logger;
	delete p_output;
	delete p_scattering_angles;
	delete cid;
	delete logger;
	delete sconf;
	delete gconf;
	return;
      }
      // cid->refinemode = 0;
      // /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
      // // if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
      // if (jer != 0) {
      // 	// First loop failed. Halt the calculation.
      // 	fclose(timing_file);
      // 	delete time_logger;
      // 	delete p_output;
      // 	delete p_scattering_angles;
      // 	delete cid;
      // 	delete logger;
      // 	delete sconf;
      // 	delete gconf;
      // 	return;
      // }

      //==================================================
      // do the first outputs here, so that I open here the new files, afterwards I only append
@@ -415,7 +415,7 @@ void inclusion(const string& config_file, const string& data_file, const string&
	}
#pragma omp barrier
	// ok, now I can actually start the parallel calculations
	for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
	for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
	  // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	  int myjxi488 = ixi488+myompthread;
@@ -595,7 +595,7 @@ void inclusion(const string& config_file, const string& data_file, const string&
      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
#pragma omp barrier
      // ok, now I can actually start the parallel calculations
      for (int ixi488=2; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
      for (int ixi488=1; ixi488<=cid_2->number_of_scales; ixi488 +=myMPIstride) {
	// the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
@@ -685,7 +685,9 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
  int jer = 0;
  int lcalc = 0;
  int jaw = 1;
  bool is_first_scale = (jxi488 == 1);
  // this is now part of cid, so don't mess with it here, just copy it by reference
  bool &is_first_scale = cid->is_first_scale;
  //bool is_first_scale = (jxi488 == 1);
  // int li = cid->c1->li;
  // int le = cid->c1->le;
  // int lm = cid->c1->lm;
@@ -1414,6 +1416,8 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
    } // jph484 loop
    th += sa->thstp;
  } // jth486 loop
  // at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes
  is_first_scale = 0;
#ifdef USE_NVTX
  nvtxRangePop();
#endif
@@ -1534,6 +1538,8 @@ InclusionIterationData::InclusionIterationData(GeometryConfiguration *gconf, Sca
  proc_device = 0;
#endif

  // the first time the object is used, it will be a "first iteration", to ensure data structures are properly initialised.
  is_first_scale = 1;
  // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
  refinemode = 2;
  // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
@@ -1685,6 +1691,7 @@ InclusionIterationData::InclusionIterationData(const InclusionIterationData& rhs
  extr = rhs.extr;
  
  proc_device = rhs.proc_device;
  is_first_scale = rhs.is_first_scale;
  refinemode = rhs.refinemode;
  maxrefiters = rhs.maxrefiters;
  accuracygoal = rhs.accuracygoal;
@@ -1823,6 +1830,7 @@ InclusionIterationData::InclusionIterationData(const mixMPI *mpidata, const int
  proc_device = 0;
#endif
  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}
@@ -1888,6 +1896,7 @@ void InclusionIterationData::mpibcast(const mixMPI *mpidata) {
  MPI_Bcast(&nimd, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&extr, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Bcast(&refinemode, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
  MPI_Bcast(&maxrefiters, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&accuracygoal, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}
+32 −21
Original line number Diff line number Diff line
@@ -201,25 +201,25 @@ void sphere(const string& config_file, const string& data_file, const string& ou
      }

      // Do the first wavelength iteration
      int jxi488 = 1;
      // int jxi488 = 1;
      // Use pragmas to put OMP parallelism to second level.
      int jer = 0;
#pragma omp parallel
      {
#pragma omp single
	{
	  jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp);
	} // OMP single
      } // OMP parallel
      if (jer != 0) { // First iteration failed. Halt the calculation.
	delete p_output;
	delete p_sa;
	delete sid;
	delete logger;
	delete sconf;
	delete gconf;
	return;
      }
// #pragma omp parallel
//       {
// #pragma omp single
// 	{
// 	  jer = sphere_jxi488_cycle(jxi488 - 1, sconf, gconf, p_sa, sid, p_output, output_path, vtppoanp);
// 	} // OMP single
//       } // OMP parallel
//       if (jer != 0) { // First iteration failed. Halt the calculation.
// 	delete p_output;
// 	delete p_sa;
// 	delete sid;
// 	delete logger;
// 	delete sconf;
// 	delete gconf;
// 	return;
//       }

      //==================================================
      // do the first outputs here, so that I open here the new files, afterwards I only append
@@ -310,7 +310,7 @@ void sphere(const string& config_file, const string& data_file, const string& ou
	}
#pragma omp barrier
	// ok, now I can actually start the parallel calculations
	for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
	for (int ixi488=1; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
	  // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	  int myjxi488 = ixi488+myompthread;
@@ -463,7 +463,7 @@ void sphere(const string& config_file, const string& data_file, const string& ou
      // make sure all threads align here: I don't want the following loop to accidentally start for thread 0, possibly modifying some variables before they are copied by all other threads
#pragma omp barrier
      // ok, now I can actually start the parallel calculations
      for (int ixi488=2; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
      for (int ixi488=1; ixi488<=sid_2->number_of_scales; ixi488 +=myMPIstride) {
	// the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
@@ -546,7 +546,9 @@ int sphere_jxi488_cycle(
  int oindex = 0;
  int jxi = jxi488 + 1;
  int jxindex = jxi - oi->first_xi;
  bool is_first_scale = (jxi == 1);
  // this is now part of sid, so don't mess with it here, just copy it by reference
  bool &is_first_scale = sid->is_first_scale;
  //bool is_first_scale = (jxi == 1);
  int nsph = gconf->number_of_spheres;
  //int l_max = gconf->l_max;
  int in_pol = gconf->in_pol;
@@ -889,6 +891,8 @@ int sphere_jxi488_cycle(
    } // jph484 loop on elevation
    th += sa->thstp;
  } // jth486 loop on azimuth
  // at the end of this iteration make sure to set is_first_scale to false, the next iteration will reinitialise _only_ if the order changes
  is_first_scale = 0;
  oi->vec_jxi[jxindex] = jxi;
  logger->log("INFO: finished scale iteration " + to_string(jxi) + " of " + to_string(nxi) + ".\n");
  delete logger;
@@ -977,6 +981,7 @@ SphereIterationData::SphereIterationData(
      zpv[zi][zj][1] = (vec_zpv + vec_index + 2);
    }
  }
  is_first_scale = 1;
}

SphereIterationData::SphereIterationData(const SphereIterationData &rhs) {
@@ -1082,6 +1087,7 @@ SphereIterationData::SphereIterationData(const SphereIterationData &rhs) {
      zpv[zi][zj][1] = (vec_zpv + vec_index + 2);
    }
  }
  is_first_scale = rhs.is_first_scale;
}

SphereIterationData::~SphereIterationData() {
@@ -1207,6 +1213,9 @@ SphereIterationData::SphereIterationData(const mixMPI *mpidata, const int device
  // Collect vectors whose size depend on LM
  vec_zpv = new double[12 * _lm];
  MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD);

  MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);

}

int SphereIterationData::mpibcast(const mixMPI *mpidata) {
@@ -1270,6 +1279,8 @@ int SphereIterationData::mpibcast(const mixMPI *mpidata) {
  // Broadcast vectors whose size depend on LM
  MPI_Bcast(vec_zpv, 12 * _lm, MPI_DOUBLE, 0, MPI_COMM_WORLD);

  MPI_Bcast(&is_first_scale, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);

  return 0;
}
#endif // MPI_VERSION