Commit c535f591 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Merge branch 'profile_trapping' into script_devel

parents 96cc3945 714bf8d8
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -826,7 +826,7 @@ else
fi
# End of offload checks
if [ "x$CXXFLAGS" = "x" ]; then
    CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}"
    CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}${NVTXFLAGS}"
fi
if [ "x$CXXLDFLAGS" = "x" ]; then
    if [ "x$LIBMODE" = "xstatic" ]; then
+3 −6
Original line number Diff line number Diff line
@@ -63,6 +63,9 @@ protected:
  void write_legacy(const std::string& file_name);

public:
  //! \brief Read only view on WK.
  const dcomplex *vec_wk;
  
  /*! \brief Swap1 instance constructor.
   *
   * \param lm: `int` Maximum field expansion order.
@@ -97,12 +100,6 @@ public:
   */
  static long get_memory_requirement(int lm, int _nkv);
  
  /*! \brief Get the pointer to the WK vector.
   *
   * \return value: `complex double *` Memory address of the WK vector.
   */
  dcomplex *get_vector() { return wk; }

  /*! \brief Bring the pointer to the next element at the start of vector.
   */
  void reset() { last_index = 0; }
+5 −8
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) {
  nlmmt = 2 * lm * (lm + 2);
  const int size = nkv * nkv * nlmmt;
  wk = new dcomplex[size]();
  vec_wk = wk;
  last_index = 0;
}

@@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) {
  string str_type;
  int _nlmmt, _nkv, lm, num_elements, index;
  dcomplex value;
  dcomplex *_wk = NULL;
  if (status == 0) {
    status = hdf_file->read("NLMMT", "INT32", &_nlmmt);
    status = hdf_file->read("NKV", "INT32", &_nkv);
    lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0);
    lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1;
    num_elements = 2 * _nlmmt * _nkv * _nkv;
    instance = new Swap1(lm, _nkv);
    _wk = instance->get_vector();
    elements = new double[num_elements]();
    str_type = "FLOAT64_(" + to_string(num_elements) + ")";
    status = hdf_file->read("WK", str_type, elements);
    for (int wi = 0; wi < num_elements / 2; wi++) {
      index = 2 * wi;
      value = elements[index] + elements[index + 1] * I;
      _wk[wi] = value;
      instance->wk[wi] = value;
    } // wi loop
    delete[] elements;
    status = hdf_file->close();
@@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) {
Swap1* Swap1::from_legacy(const std::string& file_name) {
  fstream input;
  Swap1 *instance = NULL;
  dcomplex *_wk = NULL;
  int _nlmmt, _nkv, lm;
  double rval, ival;
  input.open(file_name.c_str(), ios::in | ios::binary);
  if (input.is_open()) {
    input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int));
    lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0);
    lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1;
    input.read(reinterpret_cast<char *>(&_nkv), sizeof(int));
    instance = new Swap1(lm, _nkv);
    _wk = instance->get_vector();
    int num_elements = _nlmmt * _nkv * _nkv;
    for (int j = 0; j < num_elements; j++) {
      input.read(reinterpret_cast<char *>(&rval), sizeof(double));
      input.read(reinterpret_cast<char *>(&ival), sizeof(double));
      _wk[j] = rval + ival * I;
      instance->wk[j] = rval + ival * I;
    }
    input.close();
  } else {
+6 −0
Original line number Diff line number Diff line
@@ -576,6 +576,12 @@ int sphere_jxi488_cycle(
    oi->vec_vk[jxindex] = vk;
    oi->vec_xi[jxindex] = xi;
  }
  // Adaptive definition of L_MAX
  double wavelength = 2.0 * pi / vk;
  double size_param = 2.0 * pi * sconf->get_radius(0) / wavelength;
  int N = int(size_param + 4.05 * pow(size_param, 1.0 / 3.0)) + 2;
  if (N < l_max) l_max = N;
  // End of adaptive definition of L_MAX
  vtppoanp->append_line(VirtualBinaryLine(vk));
  double thsca = (gconf->isam > 1) ? sa->ths - sa->th : 0.0;
  for (int i132 = 0; i132 < nsph; i132++) {
+77 −15
Original line number Diff line number Diff line
@@ -56,6 +56,10 @@
#include "../include/tra_subs.h"
#endif

#ifdef USE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif

using namespace std;

/*! \brief C++ implementation of FRFME
@@ -64,13 +68,15 @@ using namespace std;
 *  \param output_path: `string` Directory to write the output files in.
 */
void frfme(string data_file, string output_path) {
#ifdef USE_NVTX
  nvtxRangePush("Running frfme()");
#endif
  string tfrfme_name = output_path + "/c_TFRFME.hd5";
  TFRFME *tfrfme = NULL;
  Swap1 *tt1 = NULL;
  Swap2 *tt2 = NULL;
  char namef[7];
  char more;
  dcomplex **w = NULL;
  dcomplex *wk = NULL;
  const dcomplex cc0 = 0.0 + 0.0 * I;
  const dcomplex uim = 0.0 + 1.0 * I;
@@ -98,6 +104,9 @@ void frfme(string data_file, string output_path) {
  int wsum_size;
  // End of vector size variables
  if (jlmf != 1) {
#ifdef USE_NVTX
    nvtxRangePush("frfme() with jlmf != 1");
#endif
    int nxv, nyv, nzv;
    if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5");
    if (tfrfme != NULL) {
@@ -140,7 +149,16 @@ void frfme(string data_file, string output_path) {
      printf("ERROR: could not open TFRFME file.\n");
    }
    nks = nkv - 1;
  } else { // label 16
#ifdef USE_NVTX
    nvtxRangePop();
#endif
  } else { // label 16; jlfm = 1
#ifdef USE_NVTX
    nvtxRangePush("frfme() with jlmf == 1");
#endif
#ifdef USE_NVTX
    nvtxRangePush("Setup operations");
#endif
    int nksh, nrsh, nxsh, nysh, nzsh;
    str_target = file_lines[last_read_line++];
    for (int cli = 0; cli < 7; cli++) {
@@ -176,6 +194,9 @@ void frfme(string data_file, string output_path) {
    }
    str_target = file_lines[last_read_line++];
    re = regex("[eEmM]");
#ifdef USE_NVTX
    nvtxRangePop();
#endif
    if (regex_search(str_target, m, re)) {
      more = m.str().at(0);
      if (more == 'm' || more == 'M') {
@@ -193,6 +214,9 @@ void frfme(string data_file, string output_path) {
      string tedf_name = output_path + "/" + namef + ".hd5";
      ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5");
      if (tedf != NULL) {
#ifdef USE_NVTX
	nvtxRangePush("TEDF data import");
#endif
	int iduml, idum;
	iduml = tedf->number_of_spheres;
	idum = tedf->get_iog(iduml - 1);
@@ -216,6 +240,9 @@ void frfme(string data_file, string output_path) {
	  xi = xip;
	}
	// label 20
#ifdef USE_NVTX
	nvtxRangePop();
#endif
	delete tedf;
	double wn = wp / 3.0e8;
	vk = xi * wn;
@@ -236,6 +263,9 @@ void frfme(string data_file, string output_path) {
	  fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy);
	}
	// label 22
#ifdef USE_NVTX
	nvtxRangePush("Memory data loading");
#endif
	nlmmt = lm * (lm + 2) * 2;
	nks = nksh * 2;
	nkv = nks + 1;
@@ -279,6 +309,12 @@ void frfme(string data_file, string output_path) {
	double *_yv = tfrfme->get_y();
	double *_zv = tfrfme->get_z();
	dcomplex **_wsum = tfrfme->get_matrix();
#ifdef USE_NVTX
	nvtxRangePop();
#endif
#ifdef USE_NVTX
	nvtxRangePush("Looped vector initialization");
#endif
	for (int i24 = nxshpo; i24 <= nxs; i24++) {
	  _xv[i24] = _xv[i24 - 1] + delxyz;
	  _xv[nxv - i24 - 1] = -_xv[i24];
@@ -297,7 +333,13 @@ void frfme(string data_file, string output_path) {
	  vkv[i28] = vkv[i28 - 1] + delk;
	  vkv[nkv - i28 - 1] = -vkv[i28];
	} // i28 loop
#ifdef USE_NVTX
	nvtxRangePop();
#endif
	if (tfrfme != NULL) {
#ifdef USE_NVTX
	  nvtxRangePush("TFRFME initialization");
#endif
	  tfrfme->set_param("vk", vk);
	  tfrfme->set_param("exri", exri);
	  tfrfme->set_param("an", an);
@@ -329,19 +371,20 @@ void frfme(string data_file, string output_path) {
	  tt2->set_param("nlmmt", 1.0 * nlmmt);
	  tt2->set_param("nrvc", 1.0 * nrvc);
	  tt2->write_binary(temp_name2, "HDF5");
#ifdef USE_NVTX
	  nvtxRangePop();
#endif
	  dcomplex *vec_w = new dcomplex[nkv * nkv]();
	  dcomplex **w = new dcomplex*[nkv];
	  for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
#ifdef USE_NVTX
	  nvtxRangePush("j80 loop");
#endif
	  for (int j80 = jlmf; j80 <= jlml; j80++) {
	    dcomplex *tt1_wk = tt1->get_vector();
	    int wk_index = 0;
	    // w matrix
	    if (w != NULL) {
	      for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi];
	      delete[] w;
	    }
	    w = new dcomplex*[nkv];
	    for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv]();
	    for (int jy50 = 0; jy50 < nkv; jy50++) {
	      for (int jx50 = 0; jx50 < nkv; jx50++) {
		for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++];
		for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++];
		w[jx50][jy50] = wk[j80 - 1];
	      } // jx50
	    } // jy50 loop
@@ -377,7 +420,15 @@ void frfme(string data_file, string output_path) {
	      } // iy70 loop
	    } // iz75 loop
	  } // j80 loop
	  delete[] vec_w;
	  delete[] w;
#ifdef USE_NVTX
	  nvtxRangePop();
#endif
	  // label 88
#ifdef USE_NVTX
	  nvtxRangePush("Closing operations");
#endif
	  tfrfme->write_binary(tfrfme_name, "HDF5");
	  string output_name = output_path + "/c_OFRFME";
	  FILE *output = fopen(output_name.c_str(), "w");
@@ -386,6 +437,9 @@ void frfme(string data_file, string output_path) {
	  if (spd > 0.0) fprintf(output, "  FSHMX =%15.7lE\n", fshmx);
	  fprintf(output, "  FRSH =%15.7lE\n", frsh);
	  fclose(output);
#ifdef USE_NVTX
	  nvtxRangePop();
#endif
	} else { // Should never happen.
	  printf("ERROR: could not open TFRFME file for output.\n");
	}
@@ -398,16 +452,24 @@ void frfme(string data_file, string output_path) {
      fprintf(output, "  WRONG INPUT TAPE\n");
      fclose(output);
    }
#ifdef USE_NVTX
    nvtxRangePop();
#endif
  }
  // label 45
#ifdef USE_NVTX
  nvtxRangePush("frfme() memory clean");
#endif
  if (tfrfme != NULL) delete tfrfme;
  delete[] file_lines;
  if (tt2 != NULL) delete tt2;
  if (w != NULL) {
    for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi];
    delete[] w;
  }
  if (wk != NULL) delete[] wk;
  if (tt1 != NULL) delete tt1;
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  printf("FRFME: Done.\n");
#ifdef USE_NVTX
  nvtxRangePop();
#endif
}
Loading