Commit c1fe34a6 authored by Giacomo Mulas's avatar Giacomo Mulas
Browse files

Merge branch 'optimize_trapping' into 'master'

Optimize trapping

See merge request giacomo.mulas/np_tmcode!99
parents 120a5b3a 25d51758
Loading
Loading
Loading
Loading
+2 −12
Original line number Diff line number Diff line
@@ -132,8 +132,6 @@ protected:
  int _last_matrix;
  //! Number of beam description wave numbers.
  int _nkv;
  //! Contiguous vector of VKZM matrix.
  double *vec_vkzm;
  //! QUESTION: definition?
  double _apfafa;
  //! QUESTION: definition?
@@ -197,7 +195,7 @@ public:
  //! QUESTION: definition?
  double *vkv;
  //! QUESTION: definition?
  double **vkzm;
  double *vec_vkzm;
  //! QUESTION: definition?
  const double &apfafa = _apfafa;
  //! QUESTION: definition?
@@ -244,12 +242,6 @@ public:
   */
  static Swap2* from_binary(const std::string& file_name, const std::string& mode="LEGACY");

  /*! \brief Get the pointer to the VKZM matrix.
   *
   * \return value: `double **` Pointer to the VKZM matrix.
   */
  double **get_matrix() { return vkzm; }

  /*! \brief Calculate the necessary amount of memory to create a new instance.
   *
   * \param nkv: `int` Number of beam description wave numbers.
@@ -350,8 +342,6 @@ protected:
  double *yv;
  //! Vector of computed z positions
  double *zv;
  //! QUESTION: definition?
  dcomplex *vec_wsum;

  /*! \brief Load a configuration instance from a HDF5 binary file.
   *
@@ -415,7 +405,7 @@ public:
  //! QUESTION: definition?
  const double& exril = _exril;
  //! QUESTION: definition?
  dcomplex **wsum;
  dcomplex *vec_wsum;
  
  /*! \brief Trapping configuration instance constructor.
   *
+54 −23
Original line number Diff line number Diff line
@@ -44,6 +44,10 @@
#include "../include/file_io.h"
#endif

#ifdef USE_TARGET_OFFLOAD
#include <cstdlib>
#endif

using namespace std;

// >>> START OF Swap1 CLASS IMPLEMENTATION <<<
@@ -220,18 +224,32 @@ bool Swap1::operator ==(Swap1 &other) {
// >>> START OF Swap2 CLASS IMPLEMENTATION <<<
Swap2::Swap2(int nkv) {
  _nkv = nkv;
#ifdef USE_TARGET_OFFLOAD
  vkv = (double *)aligned_alloc(64, _nkv * sizeof(double));
  vec_vkzm = (double *)aligned_alloc(64, _nkv * _nkv * sizeof(double));
#pragma omp parallel for collapse(2)
  for (int i = 0; i < _nkv; i++) {
    for (int j = 0; j < _nkv; j++) {
      vkv[i] = 0.0;
      vec_vkzm[_nkv * i +j] = 0.0;
    }
  }
#else
  vkv = new double[_nkv]();
  vec_vkzm = new double[_nkv * _nkv]();
  vkzm = new double*[nkv];
  for (int vi = 0; vi < _nkv; vi++) vkzm[vi] = vec_vkzm + vi * _nkv;
#endif // USE TARGET_OFFLOAD
  _last_vector = 0;
  _last_matrix = 0;
}

Swap2::~Swap2() {
#ifdef USE_TARGET_OFFLOAD
  free(vkv);
  free(vec_vkzm);
#else
  delete[] vkv;
  delete[] vec_vkzm;
  delete[] vkzm;
#endif // USE_TARGET_OFFLOAD
}

Swap2* Swap2::from_binary(const std::string& file_name, const std::string& mode) {
@@ -298,14 +316,14 @@ Swap2* Swap2::from_legacy(const std::string& file_name) {
  fstream input;
  Swap2 *instance = NULL;
  int fnkv, fnlmmt, fnrvc;
  double **fvkzm = NULL;
  double *fvkzm = NULL;
  double *fvkv = NULL;
  double value;
  input.open(file_name.c_str(), ios::in | ios::binary);
  if (input.is_open()) {
    input.read(reinterpret_cast<char *>(&fnkv), sizeof(int));
    instance = new Swap2(fnkv);
    fvkzm = instance->get_matrix();
    fvkzm = instance->vec_vkzm;
    fvkv = instance->get_vector();
    for (int vj = 0; vj < fnkv; vj++) {
      input.read(reinterpret_cast<char *>(&value), sizeof(double));
@@ -314,7 +332,7 @@ Swap2* Swap2::from_legacy(const std::string& file_name) {
    for (int mi = 0; mi < fnkv; mi++) {
      for (int mj = 0; mj < fnkv; mj++) {
	input.read(reinterpret_cast<char *>(&value), sizeof(double));
	fvkzm[mi][mj] = value;
	fvkzm[fnkv * mi + mj] = value;
      }
    }
    input.read(reinterpret_cast<char *>(&value), sizeof(double));
@@ -359,7 +377,7 @@ long Swap2::get_size(int nkv) {
void Swap2::push_matrix(double value) {
  int col = _last_matrix % (_nkv - 1);
  int row = _last_matrix - (_nkv * row);
  vkzm[row][col] = value;
  vec_vkzm[nkv * row + col] = value;
  _last_matrix++;
}

@@ -480,7 +498,7 @@ void Swap2::write_legacy(const std::string& file_name) {
    }
    for (int mi = 0; mi < _nkv; mi++) {
      for (int mj = 0; mj < _nkv; mj++) {
	value = vkzm[mi][mj];
	value = vec_vkzm[nkv * mi + mj];
	output.write(reinterpret_cast<const char*>(&value), sizeof(double));
      }
    }
@@ -552,8 +570,9 @@ bool Swap2::operator ==(Swap2 &other) {
    }
  }
  for (int mi = 0; mi < _nkv; mi++) {
    int nkvi = nkv * mi;
    for (int mj = 0; mj < _nkv; mj++) {
      if (vkzm[mi][mj] != other.vkzm[mi][mj]) {
      if (vec_vkzm[nkvi + mj] != other.vec_vkzm[nkvi + mj]) {
	return false;
      }
    }
@@ -580,22 +599,33 @@ TFRFME::TFRFME(int lmode, int lm, int nkv, int nxv, int nyv, int nzv) {
  _exril = 0.0;

  // Array initialization
  xv = new double[nxv]();
  yv = new double[nyv]();
  zv = new double[nzv]();
  _nlmmt = _lm * (_lm + 2) * 2;
  _nrvc = _nxv * _nyv * _nzv;
  vec_wsum = new dcomplex[nrvc * nlmmt]();
  wsum = new dcomplex*[nlmmt];
  for (int wi = 0; wi < nlmmt; wi++) wsum[wi] = vec_wsum + wi * nrvc;
#ifdef USE_TARGET_OFFLOAD
  xv = (double *)aligned_alloc(64, sizeof(double) * _nxv);
  yv = (double *)aligned_alloc(64, sizeof(double) * _nyv);
  zv = (double *)aligned_alloc(64, sizeof(double) * _nzv);
  vec_wsum = (dcomplex *)aligned_alloc(64, sizeof(dcomplex) * _nrvc * _nlmmt);
#else
  xv = new double[_nxv]();
  yv = new double[_nyv]();
  zv = new double[_nzv]();
  vec_wsum = new dcomplex[_nrvc * _nlmmt]();
#endif // USE_TARGET_OFFLOAD
}

TFRFME::~TFRFME() {
#ifdef USE_TARGET_OFFLOAD
  free(xv);
  free(yv);
  free(zv);
  free(vec_wsum);
#else
  delete[] xv;
  delete[] yv;
  delete[] zv;
  delete[] vec_wsum;
  delete[] wsum;
#endif
}

TFRFME* TFRFME::from_binary(const std::string& file_name, const std::string& mode) {
@@ -662,7 +692,7 @@ TFRFME* TFRFME::from_hdf5(const std::string& file_name) {
    for (int wj = 0; wj < nrvc; wj++) {
      for (int wi = 0; wi < nlmmt; wi++) {
	value = elements[index] + elements[index + 1] * I;
	instance->wsum[wi][wj] = value;
	instance->vec_wsum[nrvc * wi + wj] = value;
	index += 2;
      } // wi loop
    } // wj loop
@@ -727,7 +757,7 @@ TFRFME* TFRFME::from_legacy(const std::string& file_name) {
	input.read(reinterpret_cast<char *>(&rval), sizeof(double));
	input.read(reinterpret_cast<char *>(&ival), sizeof(double));
	dcomplex value = rval + ival * I;
	instance->wsum[wi][wj] = value;
	instance->vec_wsum[nrvc * wi + wj] = value;
      } // wi loop
    } // wj loop
    input.close();
@@ -842,8 +872,8 @@ void TFRFME::write_hdf5(const std::string& file_name) {
  int index = 0;
  for (int wj = 0; wj < nrvc; wj++) {
    for (int wi = 0; wi < nlmmt; wi++) {
      ptr_elements[index++] = real(wsum[wi][wj]);
      ptr_elements[index++] = imag(wsum[wi][wj]);
      ptr_elements[index++] = real(vec_wsum[nrvc * wi + wj]);
      ptr_elements[index++] = imag(vec_wsum[nrvc * wi + wj]);
    } // wi loop
  } // wj loop
  rec_ptr_list.append(ptr_elements);
@@ -891,8 +921,8 @@ void TFRFME::write_legacy(const std::string& file_name) {
      output.write(reinterpret_cast<char *>(&(zv[zi])), sizeof(double));
    for (int wj = 0; wj < _nrvc; wj++) {
      for (int wi = 0; wi < _nlmmt; wi++) {
	double rval = real(wsum[wi][wj]);
	double ival = imag(wsum[wi][wj]);
	double rval = real(vec_wsum[nrvc * wi + wj]);
	double ival = imag(vec_wsum[nrvc * wi + wj]);
	output.write(reinterpret_cast<char *>(&rval), sizeof(double));
	output.write(reinterpret_cast<char *>(&ival), sizeof(double));
      } // wi loop
@@ -962,8 +992,9 @@ bool TFRFME::operator ==(const TFRFME& other) {
    }
  }
  for (int wi = 0; wi < _nlmmt; wi++) {
    int i = _nrvc * wi;
    for (int wj = 0; wj < _nrvc; wj++) {
      if (wsum[wi][wj] != other.wsum[wi][wj]) {
      if (vec_wsum[i + wj] != other.vec_wsum[i + wj]) {
	return false;
      }
    } // wj loop
+3 −3
Original line number Diff line number Diff line
@@ -273,7 +273,7 @@ dcomplex *frfmer(
  const dcomplex cc0 = 0.0 + 0.0 * I;
  double sq = vkm * vkm;
  double *_vkv = tt2->get_vector();
  double **_vkzm = tt2->get_matrix();
  double *vec_vkzm = tt2->vec_vkzm;
  dcomplex *wk = new dcomplex[nlemt]();
  for (int jy90 = 0; jy90 < nkv; jy90++) {
    double vky = _vkv[jy90];
@@ -287,10 +287,10 @@ dcomplex *frfmer(
	double vkz = sqrt(sq - sqn);
	wamff(wk, vkx, vky, vkz, le, apfafa, tra, spd, rir, ftcn, lmode, pmf);
	for (int j = 0; j < nlemt; j++) tt1->append(wk[j]);
	_vkzm[jx80][jy90] = vkz;
	vec_vkzm[nkv * jx80 + jy90] = vkz;
      } else { // label 50
	for (int j = 0; j < nlemt; j++) tt1->append(cc0);
	_vkzm[jx80][jy90] = 0.0;
	vec_vkzm[nkv * jx80 + jy90] = 0.0;
      }
    } // jx80 loop
  } // jy90 loop
+485 −0

File added.

Preview size limit exceeded, changes collapsed.

+121 −245

File changed.

Preview size limit exceeded, changes collapsed.

Loading