Commit 57ac3b1a authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Make TFRFME::vec_wsum directly accessible

parent 0becb9e3
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -342,8 +342,6 @@ protected:
  double *yv;
  //! Vector of computed z positions
  double *zv;
  //! QUESTION: definition?
  dcomplex *vec_wsum;

  /*! \brief Load a configuration instance from a HDF5 binary file.
   *
@@ -407,7 +405,7 @@ public:
  //! QUESTION: definition?
  const double& exril = _exril;
  //! QUESTION: definition?
  dcomplex **wsum;
  dcomplex *vec_wsum;
  
  /*! \brief Trapping configuration instance constructor.
   *
+8 −10
Original line number Diff line number Diff line
@@ -584,8 +584,6 @@ TFRFME::TFRFME(int lmode, int lm, int nkv, int nxv, int nyv, int nzv) {
  _nlmmt = _lm * (_lm + 2) * 2;
  _nrvc = _nxv * _nyv * _nzv;
  vec_wsum = new dcomplex[nrvc * nlmmt]();
  wsum = new dcomplex*[nlmmt];
  for (int wi = 0; wi < nlmmt; wi++) wsum[wi] = vec_wsum + wi * nrvc;
}

TFRFME::~TFRFME() {
@@ -593,7 +591,6 @@ TFRFME::~TFRFME() {
  delete[] yv;
  delete[] zv;
  delete[] vec_wsum;
  delete[] wsum;
}

TFRFME* TFRFME::from_binary(const std::string& file_name, const std::string& mode) {
@@ -660,7 +657,7 @@ TFRFME* TFRFME::from_hdf5(const std::string& file_name) {
    for (int wj = 0; wj < nrvc; wj++) {
      for (int wi = 0; wi < nlmmt; wi++) {
	value = elements[index] + elements[index + 1] * I;
	instance->wsum[wi][wj] = value;
	instance->vec_wsum[nrvc * wi + wj] = value;
	index += 2;
      } // wi loop
    } // wj loop
@@ -725,7 +722,7 @@ TFRFME* TFRFME::from_legacy(const std::string& file_name) {
	input.read(reinterpret_cast<char *>(&rval), sizeof(double));
	input.read(reinterpret_cast<char *>(&ival), sizeof(double));
	dcomplex value = rval + ival * I;
	instance->wsum[wi][wj] = value;
	instance->vec_wsum[nrvc * wi + wj] = value;
      } // wi loop
    } // wj loop
    input.close();
@@ -840,8 +837,8 @@ void TFRFME::write_hdf5(const std::string& file_name) {
  int index = 0;
  for (int wj = 0; wj < nrvc; wj++) {
    for (int wi = 0; wi < nlmmt; wi++) {
      ptr_elements[index++] = real(wsum[wi][wj]);
      ptr_elements[index++] = imag(wsum[wi][wj]);
      ptr_elements[index++] = real(vec_wsum[nrvc * wi + wj]);
      ptr_elements[index++] = imag(vec_wsum[nrvc * wi + wj]);
    } // wi loop
  } // wj loop
  rec_ptr_list.append(ptr_elements);
@@ -889,8 +886,8 @@ void TFRFME::write_legacy(const std::string& file_name) {
      output.write(reinterpret_cast<char *>(&(zv[zi])), sizeof(double));
    for (int wj = 0; wj < _nrvc; wj++) {
      for (int wi = 0; wi < _nlmmt; wi++) {
	double rval = real(wsum[wi][wj]);
	double ival = imag(wsum[wi][wj]);
	double rval = real(vec_wsum[nrvc * wi + wj]);
	double ival = imag(vec_wsum[nrvc * wi + wj]);
	output.write(reinterpret_cast<char *>(&rval), sizeof(double));
	output.write(reinterpret_cast<char *>(&ival), sizeof(double));
      } // wi loop
@@ -960,8 +957,9 @@ bool TFRFME::operator ==(const TFRFME& other) {
    }
  }
  for (int wi = 0; wi < _nlmmt; wi++) {
    int i = _nrvc * wi;
    for (int wj = 0; wj < _nrvc; wj++) {
      if (wsum[wi][wj] != other.wsum[wi][wj]) {
      if (vec_wsum[i + wj] != other.vec_wsum[i + wj]) {
	return false;
      }
    } // wj loop
+9 −2
Original line number Diff line number Diff line
@@ -66,6 +66,8 @@
#endif

#ifdef USE_TARGET_OFFLOAD
#include <cstdlib>

/*! \brief Specialized function to perform GPU-offloaded trapping loop.
 *
 * The offload of GPU operations through interface layers, such as OpenMP,
@@ -461,9 +463,13 @@ void frfme(string data_file, string output_path) {
	  int size_global_vec_w = nkvs * (jlml - jlmf + 1);
	  int size_vec_tt1_wk = nkvs * nlmmt;
	  const dcomplex *vec_tt1_wk = tt1->wk;
	  dcomplex *vec_wsum = tfrfme->wsum[0];
	  dcomplex *vec_wsum = tfrfme->vec_wsum;
	  double *vec_vkzm = tt2->vec_vkzm;
#ifdef USE_TARGET_OFFLOAD
	  dcomplex *global_vec_w = (dcomplex *)aligned_alloc(64, size_global_vec_w * sizeof(dcomplex));
#else
	  dcomplex *global_vec_w = new dcomplex[size_global_vec_w]();
#endif // USE_TARGET_OFFLOAD
	  message = "INFO: looping over " + to_string(jlml - jlmf + 1) + " J iterations.\n";
	  logger.log(message);
#ifdef USE_TARGET_OFFLOAD
@@ -490,6 +496,7 @@ void frfme(string data_file, string output_path) {
	  sprintf(buffer, "INFO: loop calculation took %lfs.\n", elapsed.count());
	  message = string(buffer);
	  logger.log(message);
	  free(global_vec_w);
#else
#pragma omp parallel for
	  for (int j80 = jlmf - 1; j80 < jlml; j80++) {
@@ -550,8 +557,8 @@ void frfme(string data_file, string output_path) {
	      vec_wsum[(j80 * nrvc) + ixyz] = sumy * delks;
	    } // ixyz loop
	  } // j80 loop
#endif // USE_TARGET_OFFLOAD
	  delete[] global_vec_w;
#endif // USE_TARGET_OFFLOAD
#ifdef USE_NVTX
	  nvtxRangePop();
#endif
+2 −1
Original line number Diff line number Diff line
@@ -303,6 +303,7 @@ void lffft(string data_file, string output_path) {
	  // label 160
	  const int nlmm = lm * (lm + 2);
	  const int nlmmt = nlmm + nlmm;
	  const int nrvc = nxv * nyv * nzv;
	  ws = new dcomplex[nlmmt]();
	  if (lm > le) wsl = new dcomplex[nlmmt]();
	  // FORTRAN writes two output formatted files without opening them
@@ -320,7 +321,7 @@ void lffft(string data_file, string output_path) {
		  //binary_input.read(reinterpret_cast<char *>(&vimag), sizeof(double));
		  int row = i;
		  int col = (nyv * nxv * iz475) + (nxv * iy475) + ix475;
		  dcomplex value = tfrfme->wsum[row][col];
		  dcomplex value = tfrfme->vec_wsum[nrvc * row + col];
		  if (lm <= le) {
		    ws[i] = value;
		  } else { // label 170