Commit e2e95b84 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Adapt np_cluster to use ClusterOutputInfo with MPI

parent 64e4ba86
Loading
Loading
Loading
Loading
+75 −68
Original line number Diff line number Diff line
@@ -229,7 +229,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
      // Shortcuts to variables stored in configuration objects
      ScatteringAngles *p_scattering_angles = new ScatteringAngles(gconf);
      double wp = sconf->wp;
      // Open empty virtual ascii file for output
      // ClusterOutputInfo : Thread 0 of MPI process 0 allocates the memory to
      // store all the results.
      ClusterOutputInfo *p_output = new ClusterOutputInfo(sconf, gconf, mpidata);
      // Create and initialise pristine cid for MPI proc 0 and thread 0
      ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata, device_count);
@@ -480,8 +481,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
	      }
	    }
#endif
	    p_outarray[0]->write(output_path + "/c_OCLU", "LEGACY");
	    delete p_outarray[0];
	    // ClusterOutputInfo : the VirtualAsciiFile instances were appended to
	    // disk here. This is no longer the case.
	    // p_outarray[0]->write(output_path + "/c_OCLU", "LEGACY");
	    // delete p_outarray[0];
	  }
	  // end block writing to disk
#ifdef USE_NVTX
@@ -506,7 +509,9 @@ void cluster(const string& config_file, const string& data_file, const string& o
	delete cid_2;
      }
      delete p_scattering_angles;
    }    
      p_output->write(output_path + "/c_OCLU", "LEGACY");
      delete p_output;
    } // closes s_nsph == nsph check

    else { // NSPH mismatch between geometry and scatterer configurations.
      throw UnrecognizedConfigurationException(
@@ -579,9 +584,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
      if (myompthread == 0) {
	cid_2 = cid;
	// Thread 0 of non-zero MPI processes needs to allocate memory for the
	// output of all threads.
	p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488startoffset, myMPIstride);
      } else {
	// this is not thread 0, so do create fresh copies of all local variables
	cid_2 = new ClusterIterationData(*cid);
@@ -593,9 +595,13 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
	// each thread opens new virtual files and stores their pointers in the shared array
	if (myompthread > 0)
	// Thread 0 of non-zero MPI processes needs to allocate memory for the
	// output of all threads.
	if (myompthread == 0)
	  p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
	else
	  p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	// each thread opens new virtual files and stores their pointers in the shared array
	vtppoanp_2 = new VirtualBinaryFile();
	// each thread puts a copy of the pointers to its virtual files in the shared arrays
	p_outarray[myompthread] = p_output_2;
@@ -685,6 +691,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
  int num_configs = sconf->configurations;
  int ndirs = sa->nkks;
  int oindex = -1;
  int jindex = jxi488 - output->first_xi + 1;

#ifdef USE_NVTX
  nvtxRangePush("Prepare matrix calculation");
@@ -697,18 +704,18 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
  if (idfc >= 0) {
    cid->vk = xi * cid->wn;
    vkarg = cid->vk;
    output->vec_vk[jxi488 - 1] = cid->vk;
    output->vec_xi[jxi488 - 1] = xi;
    output->vec_vk[jindex - 1] = cid->vk;
    output->vec_xi[jindex - 1] = xi;
  } else {
    vkarg = xi * cid->vk;
    cid->sqsfi = 1.0 / (xi * xi);
    output->vec_vk[jxi488 - 1] = cid->vk;
    output->vec_xi[jxi488 - 1] = xi;
    output->vec_vk[jindex - 1] = cid->vk;
    output->vec_xi[jindex - 1] = xi;
  }
  hjv(exri, vkarg, jer, lcalc, cid->arg, cid->c1);
  if (jer != 0) {
    output->vec_ier[jxi488 - 1] = 1;
    output->vec_jxi[jxi488 - 1] = -jxi488;
    output->vec_ier[jindex - 1] = 1;
    output->vec_jxi[jindex - 1] = -jxi488;
    return jer;
    // break; // rewrite this to go to the end of the function, to free locally allocated variables and return jer
  }
@@ -739,14 +746,14 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	  cid->c1, jer, lcalc, cid->arg, last_configuration
	  );
      if (jer != 0) {
	output->vec_ier[jxi488 - 1] = 2;
	output->vec_jxi[jxi488 - 1] = -jxi488;
	output->vec_ier[jindex - 1] = 2;
	output->vec_jxi[jindex - 1] = -jxi488;
	return jer;
	//break;
      }
    }
    if (jer != 0) {
      output->vec_jxi[jxi488 - 1] = -jxi488;
      output->vec_jxi[jindex - 1] = -jxi488;
      return jer;
      //break;
    }
@@ -877,7 +884,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
    if (cid->c1->iog[i170 - 1] >= i170) {
      int i = i170 - 1;
      last_configuration++;
      oindex = (jxi488 - 1) * num_configs + last_configuration - 1;
      oindex = (jindex - 1) * num_configs + last_configuration - 1;
      double albeds = cid->c1->sscs[i] / cid->c1->sexs[i];
      cid->c1->sqscs[i] *= cid->sqsfi;
      cid->c1->sqabs[i] *= cid->sqsfi;
@@ -916,15 +923,15 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
      output->vec_tqsk2[oindex] = cid->tqss[1][i];
    }
  } // i170 loop
  output->vec_fsat[jxi488 - 1] = cid->c1->tfsas;
  output->vec_fsat[jindex - 1] = cid->c1->tfsas;
  csch = 2.0 * cid->vk * cid->sqsfi / cid->c1->gcs;
  s0 = cid->c1->tfsas * exri;
  qschu = imag(s0) * csch;
  pschu = real(s0) * csch;
  s0mag = cabs(s0) * cs0;
  output->vec_qschut[jxi488 - 1] = qschu;
  output->vec_pschut[jxi488 - 1] = pschu;
  output->vec_s0magt[jxi488 - 1] = s0mag;
  output->vec_qschut[jindex - 1] = qschu;
  output->vec_pschut[jindex - 1] = pschu;
  output->vec_s0magt[jindex - 1] = s0mag;
  vtppoanp->append_line(VirtualBinaryLine(cid->vk));
  pcrsm0(cid->vk, exri, inpol, cid->c1);
  apcra(cid->zpv, cid->c1->le, cid->c1->am0m, inpol, sqk, cid->gapm, cid->gappm);
@@ -1097,54 +1104,54 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	      // }
	      // label 208
	      if (ipol == -1) {
		output->vec_scc1[jxi488 - 1] = scasm;
		output->vec_abc1[jxi488 - 1] = abssm;
		output->vec_exc1[jxi488 - 1] = extsm;
		output->vec_albedc1[jxi488 - 1] = albdm;
		output->vec_qscamc1[jxi488 - 1] = qscam;
		output->vec_qabsmc1[jxi488 - 1] = qabsm;
		output->vec_qextmc1[jxi488 - 1] = qextm;
		output->vec_sccrt1[jxi488 - 1] = scarm;
		output->vec_abcrt1[jxi488 - 1] = absrm;
		output->vec_excrt1[jxi488 - 1] = extrm;
		output->vec_scc1[jindex - 1] = scasm;
		output->vec_abc1[jindex - 1] = abssm;
		output->vec_exc1[jindex - 1] = extsm;
		output->vec_albedc1[jindex - 1] = albdm;
		output->vec_qscamc1[jindex - 1] = qscam;
		output->vec_qabsmc1[jindex - 1] = qabsm;
		output->vec_qextmc1[jindex - 1] = qextm;
		output->vec_sccrt1[jindex - 1] = scarm;
		output->vec_abcrt1[jindex - 1] = absrm;
		output->vec_excrt1[jindex - 1] = extrm;
		// Following values are written after FSAC
		output->vec_qschuc1[jxi488 - 1] = qschum;
		output->vec_pschuc1[jxi488 - 1] = pschum;
		output->vec_s0magc1[jxi488 - 1] = s0magm;
		output->vec_qschuc1[jindex - 1] = qschum;
		output->vec_pschuc1[jindex - 1] = pschum;
		output->vec_s0magc1[jindex - 1] = s0magm;
	      } else if (ipol == 1) {
		output->vec_scc2[jxi488 - 1] = scasm;
		output->vec_abc2[jxi488 - 1] = abssm;
		output->vec_exc2[jxi488 - 1] = extsm;
		output->vec_albedc2[jxi488 - 1] = albdm;
		output->vec_qscamc2[jxi488 - 1] = qscam;
		output->vec_qabsmc2[jxi488 - 1] = qabsm;
		output->vec_qextmc2[jxi488 - 1] = qextm;
		output->vec_sccrt2[jxi488 - 1] = scarm;
		output->vec_abcrt2[jxi488 - 1] = absrm;
		output->vec_excrt2[jxi488 - 1] = extrm;
		output->vec_scc2[jindex - 1] = scasm;
		output->vec_abc2[jindex - 1] = abssm;
		output->vec_exc2[jindex - 1] = extsm;
		output->vec_albedc2[jindex - 1] = albdm;
		output->vec_qscamc2[jindex - 1] = qscam;
		output->vec_qabsmc2[jindex - 1] = qabsm;
		output->vec_qextmc2[jindex - 1] = qextm;
		output->vec_sccrt2[jindex - 1] = scarm;
		output->vec_abcrt2[jindex - 1] = absrm;
		output->vec_excrt2[jindex - 1] = extrm;
		// Following values are written after FSAC
		output->vec_qschuc2[jxi488 - 1] = qschum;
		output->vec_pschuc2[jxi488 - 1] = pschum;
		output->vec_s0magc2[jxi488 - 1] = s0magm;
		output->vec_qschuc2[jindex - 1] = qschum;
		output->vec_pschuc2[jindex - 1] = pschum;
		output->vec_s0magc2[jindex - 1] = s0magm;
	      }
	      if (ilr210 == 1 && jlr == 2) {
		output->vec_fsac11[jxi488 - 1] = cid->c1->fsacm[0][0];
		output->vec_fsac21[jxi488 - 1] = cid->c1->fsacm[1][0];
		output->vec_fsac11[jindex - 1] = cid->c1->fsacm[0][0];
		output->vec_fsac21[jindex - 1] = cid->c1->fsacm[1][0];
	      } else if (ilr210 == 2 && jlr == 1) {
		output->vec_fsac22[jxi488 - 1] = cid->c1->fsacm[1][1];
		output->vec_fsac12[jxi488 - 1] = cid->c1->fsacm[0][1];
		output->vec_fsac22[jindex - 1] = cid->c1->fsacm[1][1];
		output->vec_fsac12[jindex - 1] = cid->c1->fsacm[0][1];
	      }
	      double rapr = cid->c1->ecscm[ilr210 - 1] - cid->gapm[2][ilr210 - 1];
	      double cosav = cid->gapm[2][ilr210 - 1] / cid->c1->scscm[ilr210 - 1];
	      double fz = rapr;
	      if (ilr210 == 1) {
		output->vec_cosavc1[jxi488 - 1] = cosav;
		output->vec_raprc1[jxi488 - 1] = rapr;
		output->vec_fkc1[jxi488 - 1] = fz;
		output->vec_cosavc1[jindex - 1] = cosav;
		output->vec_raprc1[jindex - 1] = rapr;
		output->vec_fkc1[jindex - 1] = fz;
	      } else if (ilr210 == 2) {
		output->vec_cosavc2[jxi488 - 1] = cosav;
		output->vec_raprc2[jxi488 - 1] = rapr;
		output->vec_fkc2[jxi488 - 1] = fz;
		output->vec_cosavc2[jindex - 1] = cosav;
		output->vec_raprc2[jindex - 1] = rapr;
		output->vec_fkc2[jindex - 1] = fz;
	      }
	    } // ilr210 loop
	  }
@@ -1174,7 +1181,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	  last_configuration = 0;
	  for (int i226 = 1; i226 <= nsph; i226++) {
	    if (cid->c1->iog[i226 - 1] >= i226) {
	      oindex = (jxi488 - 1) * num_configs * ndirs + num_configs * done_dirs + last_configuration;
	      oindex = (jindex - 1) * num_configs * ndirs + num_configs * done_dirs + last_configuration;
	      last_configuration++;
	      output->vec_dir_sas11[oindex] = cid->c1->sas[i226 - 1][0][0];
	      output->vec_dir_sas21[oindex] = cid->c1->sas[i226 - 1][1][0];
@@ -1185,14 +1192,14 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	      } // j225 loop
	      mmulc(cid->c1->vint, cid->cmullr, cid->cmul);
	      for (int i1 = 0; i1 < 4; i1++) {
		oindex = 16 * (jxi488 - 1) * num_configs * ndirs + 16 * num_configs * done_dirs + 16 * (last_configuration - 1) + 4 * i1;
		oindex = 16 * (jindex - 1) * num_configs * ndirs + 16 * num_configs * done_dirs + 16 * (last_configuration - 1) + 4 * i1;
		output->vec_dir_muls[oindex] = cid->cmul[i1][0];
		output->vec_dir_muls[oindex + 1] = cid->cmul[i1][1];
		output->vec_dir_muls[oindex + 2] = cid->cmul[i1][2];
		output->vec_dir_muls[oindex + 3] = cid->cmul[i1][3];
	      } // i1 loop
	      for (int i1 = 0; i1 < 4; i1++) {
		oindex = 16 * (jxi488 - 1) * num_configs * ndirs + 16 * num_configs * done_dirs + 16 * (last_configuration - 1) + 4 * i1;
		oindex = 16 * (jindex - 1) * num_configs * ndirs + 16 * num_configs * done_dirs + 16 * (last_configuration - 1) + 4 * i1;
		output->vec_dir_mulslr[oindex] = cid->cmullr[i1][0];
		output->vec_dir_mulslr[oindex + 1] = cid->cmullr[i1][1];
		output->vec_dir_mulslr[oindex + 2] = cid->cmullr[i1][2];
@@ -1200,7 +1207,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	      } // i1 loop
	    }
	  } // i226 loop
	  oindex = (jxi488 - 1) * ndirs + done_dirs;
	  oindex = (jindex - 1) * ndirs + done_dirs;
	  output->vec_dir_sat11[oindex] = cid->c1->tsas[0][0];
	  output->vec_dir_sat21[oindex] = cid->c1->tsas[1][0];
	  output->vec_dir_sat12[oindex] = cid->c1->tsas[0][1];
@@ -1431,14 +1438,14 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	    }
	  } //ilr290 loop
	  for (int i = 0; i < 4; i++) {
	    oindex = 16 * (jxi488 - 1) * ndirs + 16 * done_dirs + 4 * i;
	    oindex = 16 * (jindex - 1) * ndirs + 16 * done_dirs + 4 * i;
	    output->vec_dir_mulc[oindex] = cid->cmul[i][0];
	    output->vec_dir_mulc[oindex + 1] = cid->cmul[i][1];
	    output->vec_dir_mulc[oindex + 2] = cid->cmul[i][2];
	    output->vec_dir_mulc[oindex + 3] = cid->cmul[i][3];
	  }
	  for (int i = 0; i < 4; i++) {
	    oindex = 16 * (jxi488 - 1) * ndirs + 16 * done_dirs + 4 * i;
	    oindex = 16 * (jindex - 1) * ndirs + 16 * done_dirs + 4 * i;
	    output->vec_dir_mulclr[oindex] = cid->cmullr[i][0];
	    output->vec_dir_mulclr[oindex + 1] = cid->cmullr[i][1];
	    output->vec_dir_mulclr[oindex + 2] = cid->cmullr[i][2];
@@ -1465,14 +1472,14 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
	    }
	    // label 318
	    for (int i = 0; i < 4; i++) {
	      oindex = 16 * (jxi488 - 1) + 4 * i;
	      oindex = 16 * (jindex - 1) + 4 * i;
	      output->vec_dir_mulc[oindex] = cid->cmul[i][0];
	      output->vec_dir_mulc[oindex + 1] = cid->cmul[i][1];
	      output->vec_dir_mulc[oindex + 2] = cid->cmul[i][2];
	      output->vec_dir_mulc[oindex + 3] = cid->cmul[i][3];
	    }
	    for (int i = 0; i < 4; i++) {
	      oindex = 16 * (jxi488 - 1) + 4 * i;
	      oindex = 16 * (jindex - 1) + 4 * i;
	      output->vec_dir_mulclr[oindex] = cid->cmullr[i][0];
	      output->vec_dir_mulclr[oindex + 1] = cid->cmullr[i][1];
	      output->vec_dir_mulclr[oindex + 2] = cid->cmullr[i][2];
@@ -1492,7 +1499,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  if (jer == 0) output->vec_jxi[jxi488 - 1] = jxi488;
  if (jer == 0) output->vec_jxi[jindex - 1] = jxi488;
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: angle loop for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
+6 −0
Original line number Diff line number Diff line
@@ -35,6 +35,12 @@
#include <cstdio>
#include <string>

#ifdef USE_MPI
#ifndef MPI_VERSION
#include <mpi.h>
#endif
#endif

#ifndef INCLUDE_TYPES_H_
#include "../include/types.h"
#endif
+5 −3
Original line number Diff line number Diff line
@@ -67,6 +67,8 @@ protected:
  int write_legacy(const std::string &output);
  
public:
  //! \brief Read-only view on the ID of the first scale
  const int &first_xi = _first_xi;
  //! \brief Number of spheres in the aggregate.
  int nsph;
  //! \brief Maximum internal field expansion order.
@@ -459,12 +461,12 @@ public:
   * \param sc: `ScattererConfiguration *` Pointer to a `ScattererConfiguration` instance.
   * \param gc: `GeometryConfiguration *` Pointer to a `GeometryConfiguration` instance.
   * \param mpidata: `const mixMPI*` Pointer to a mixMPI instance.
   * \param first_xi: `int` Index of the first scale in output (optional, default is 0).
   * \param xi_length: `int` Number of scales tobe included in output (optional, default is all).
   * \param first_xi: `int` Index of the first scale in output (optional, default is 1).
   * \param xi_length: `int` Number of scales tobe included in output (optional, default is 0, meaning all).
   */   
  ClusterOutputInfo(
    ScattererConfiguration *sc, GeometryConfiguration *gc,
    const mixMPI *mpidata, int first_xi = 0, int xi_length = 0
    const mixMPI *mpidata, int first_xi = 1, int xi_length = 0
  );

  /*! \brief `ClusterOutputInfo` instance destroyer.
+171 −169

File changed.

Preview size limit exceeded, changes collapsed.