Loading src/include/tfrfme.h +1 −3 Original line number Diff line number Diff line Loading @@ -342,8 +342,6 @@ protected: double *yv; //! Vector of computed z positions double *zv; //! QUESTION: definition? dcomplex *vec_wsum; /*! \brief Load a configuration instance from a HDF5 binary file. * Loading Loading @@ -407,7 +405,7 @@ public: //! QUESTION: definition? const double& exril = _exril; //! QUESTION: definition? dcomplex **wsum; dcomplex *vec_wsum; /*! \brief Trapping configuration instance constructor. * Loading src/libnptm/tfrfme.cpp +8 −10 Original line number Diff line number Diff line Loading @@ -584,8 +584,6 @@ TFRFME::TFRFME(int lmode, int lm, int nkv, int nxv, int nyv, int nzv) { _nlmmt = _lm * (_lm + 2) * 2; _nrvc = _nxv * _nyv * _nzv; vec_wsum = new dcomplex[nrvc * nlmmt](); wsum = new dcomplex*[nlmmt]; for (int wi = 0; wi < nlmmt; wi++) wsum[wi] = vec_wsum + wi * nrvc; } TFRFME::~TFRFME() { Loading @@ -593,7 +591,6 @@ TFRFME::~TFRFME() { delete[] yv; delete[] zv; delete[] vec_wsum; delete[] wsum; } TFRFME* TFRFME::from_binary(const std::string& file_name, const std::string& mode) { Loading Loading @@ -660,7 +657,7 @@ TFRFME* TFRFME::from_hdf5(const std::string& file_name) { for (int wj = 0; wj < nrvc; wj++) { for (int wi = 0; wi < nlmmt; wi++) { value = elements[index] + elements[index + 1] * I; instance->wsum[wi][wj] = value; instance->vec_wsum[nrvc * wi + wj] = value; index += 2; } // wi loop } // wj loop Loading Loading @@ -725,7 +722,7 @@ TFRFME* TFRFME::from_legacy(const std::string& file_name) { input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double)); dcomplex value = rval + ival * I; instance->wsum[wi][wj] = value; instance->vec_wsum[nrvc * wi + wj] = value; } // wi loop } // wj loop input.close(); Loading Loading @@ -840,8 +837,8 @@ void TFRFME::write_hdf5(const std::string& file_name) { int index = 0; for (int wj = 0; wj < nrvc; wj++) { for (int wi = 0; wi < nlmmt; wi++) { ptr_elements[index++] = real(wsum[wi][wj]); ptr_elements[index++] = imag(wsum[wi][wj]); ptr_elements[index++] = real(vec_wsum[nrvc * wi + wj]); ptr_elements[index++] = imag(vec_wsum[nrvc * wi + wj]); } // wi loop } // wj loop rec_ptr_list.append(ptr_elements); Loading Loading @@ -889,8 +886,8 @@ void TFRFME::write_legacy(const std::string& file_name) { output.write(reinterpret_cast<char *>(&(zv[zi])), sizeof(double)); for (int wj = 0; wj < _nrvc; wj++) { for (int wi = 0; wi < _nlmmt; wi++) { double rval = real(wsum[wi][wj]); double ival = imag(wsum[wi][wj]); double rval = real(vec_wsum[nrvc * wi + wj]); double ival = imag(vec_wsum[nrvc * wi + wj]); output.write(reinterpret_cast<char *>(&rval), sizeof(double)); output.write(reinterpret_cast<char *>(&ival), sizeof(double)); } // wi loop Loading Loading @@ -960,8 +957,9 @@ bool TFRFME::operator ==(const TFRFME& other) { } } for (int wi = 0; wi < _nlmmt; wi++) { int i = _nrvc * wi; for (int wj = 0; wj < _nrvc; wj++) { if (wsum[wi][wj] != other.wsum[wi][wj]) { if (vec_wsum[i + wj] != other.vec_wsum[i + wj]) { return false; } } // wj loop Loading src/trapping/cfrfme.cpp +9 −2 Original line number Diff line number Diff line Loading @@ -66,6 +66,8 @@ #endif #ifdef USE_TARGET_OFFLOAD #include <cstdlib> /*! \brief Specialized function to perform GPU-offloaded trapping loop. * * The offload of GPU operations through interface layers, such as OpenMP, Loading Loading @@ -461,9 +463,13 @@ void frfme(string data_file, string output_path) { int size_global_vec_w = nkvs * (jlml - jlmf + 1); int size_vec_tt1_wk = nkvs * nlmmt; const dcomplex *vec_tt1_wk = tt1->wk; dcomplex *vec_wsum = tfrfme->wsum[0]; dcomplex *vec_wsum = tfrfme->vec_wsum; double *vec_vkzm = tt2->vec_vkzm; #ifdef USE_TARGET_OFFLOAD dcomplex *global_vec_w = (dcomplex *)aligned_alloc(64, size_global_vec_w * sizeof(dcomplex)); #else dcomplex *global_vec_w = new dcomplex[size_global_vec_w](); #endif // USE_TARGET_OFFLOAD message = "INFO: looping over " + to_string(jlml - jlmf + 1) + " J iterations.\n"; logger.log(message); #ifdef USE_TARGET_OFFLOAD Loading @@ -490,6 +496,7 @@ void frfme(string data_file, string output_path) { sprintf(buffer, "INFO: loop calculation took %lfs.\n", elapsed.count()); message = string(buffer); logger.log(message); free(global_vec_w); #else #pragma omp parallel for for (int j80 = jlmf - 1; j80 < jlml; j80++) { Loading Loading @@ -550,8 +557,8 @@ void frfme(string data_file, string output_path) { vec_wsum[(j80 * nrvc) + ixyz] = sumy * delks; } // ixyz loop } // j80 loop #endif // USE_TARGET_OFFLOAD delete[] global_vec_w; #endif // USE_TARGET_OFFLOAD #ifdef USE_NVTX nvtxRangePop(); #endif Loading src/trapping/clffft.cpp +2 −1 Original line number Diff line number Diff line Loading @@ -303,6 +303,7 @@ void lffft(string data_file, string output_path) { // label 160 const int nlmm = lm * (lm + 2); const int nlmmt = nlmm + nlmm; const int nrvc = nxv * nyv * nzv; ws = new dcomplex[nlmmt](); if (lm > le) wsl = new dcomplex[nlmmt](); // FORTRAN writes two output formatted files without opening them Loading @@ -320,7 +321,7 @@ void lffft(string data_file, string output_path) { //binary_input.read(reinterpret_cast<char *>(&vimag), sizeof(double)); int row = i; int col = (nyv * nxv * iz475) + (nxv * iy475) + ix475; dcomplex value = tfrfme->wsum[row][col]; dcomplex value = tfrfme->vec_wsum[nrvc * row + col]; if (lm <= le) { ws[i] = value; } else { // label 170 Loading Loading
src/include/tfrfme.h +1 −3 Original line number Diff line number Diff line Loading @@ -342,8 +342,6 @@ protected: double *yv; //! Vector of computed z positions double *zv; //! QUESTION: definition? dcomplex *vec_wsum; /*! \brief Load a configuration instance from a HDF5 binary file. * Loading Loading @@ -407,7 +405,7 @@ public: //! QUESTION: definition? const double& exril = _exril; //! QUESTION: definition? dcomplex **wsum; dcomplex *vec_wsum; /*! \brief Trapping configuration instance constructor. * Loading
src/libnptm/tfrfme.cpp +8 −10 Original line number Diff line number Diff line Loading @@ -584,8 +584,6 @@ TFRFME::TFRFME(int lmode, int lm, int nkv, int nxv, int nyv, int nzv) { _nlmmt = _lm * (_lm + 2) * 2; _nrvc = _nxv * _nyv * _nzv; vec_wsum = new dcomplex[nrvc * nlmmt](); wsum = new dcomplex*[nlmmt]; for (int wi = 0; wi < nlmmt; wi++) wsum[wi] = vec_wsum + wi * nrvc; } TFRFME::~TFRFME() { Loading @@ -593,7 +591,6 @@ TFRFME::~TFRFME() { delete[] yv; delete[] zv; delete[] vec_wsum; delete[] wsum; } TFRFME* TFRFME::from_binary(const std::string& file_name, const std::string& mode) { Loading Loading @@ -660,7 +657,7 @@ TFRFME* TFRFME::from_hdf5(const std::string& file_name) { for (int wj = 0; wj < nrvc; wj++) { for (int wi = 0; wi < nlmmt; wi++) { value = elements[index] + elements[index + 1] * I; instance->wsum[wi][wj] = value; instance->vec_wsum[nrvc * wi + wj] = value; index += 2; } // wi loop } // wj loop Loading Loading @@ -725,7 +722,7 @@ TFRFME* TFRFME::from_legacy(const std::string& file_name) { input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double)); dcomplex value = rval + ival * I; instance->wsum[wi][wj] = value; instance->vec_wsum[nrvc * wi + wj] = value; } // wi loop } // wj loop input.close(); Loading Loading @@ -840,8 +837,8 @@ void TFRFME::write_hdf5(const std::string& file_name) { int index = 0; for (int wj = 0; wj < nrvc; wj++) { for (int wi = 0; wi < nlmmt; wi++) { ptr_elements[index++] = real(wsum[wi][wj]); ptr_elements[index++] = imag(wsum[wi][wj]); ptr_elements[index++] = real(vec_wsum[nrvc * wi + wj]); ptr_elements[index++] = imag(vec_wsum[nrvc * wi + wj]); } // wi loop } // wj loop rec_ptr_list.append(ptr_elements); Loading Loading @@ -889,8 +886,8 @@ void TFRFME::write_legacy(const std::string& file_name) { output.write(reinterpret_cast<char *>(&(zv[zi])), sizeof(double)); for (int wj = 0; wj < _nrvc; wj++) { for (int wi = 0; wi < _nlmmt; wi++) { double rval = real(wsum[wi][wj]); double ival = imag(wsum[wi][wj]); double rval = real(vec_wsum[nrvc * wi + wj]); double ival = imag(vec_wsum[nrvc * wi + wj]); output.write(reinterpret_cast<char *>(&rval), sizeof(double)); output.write(reinterpret_cast<char *>(&ival), sizeof(double)); } // wi loop Loading Loading @@ -960,8 +957,9 @@ bool TFRFME::operator ==(const TFRFME& other) { } } for (int wi = 0; wi < _nlmmt; wi++) { int i = _nrvc * wi; for (int wj = 0; wj < _nrvc; wj++) { if (wsum[wi][wj] != other.wsum[wi][wj]) { if (vec_wsum[i + wj] != other.vec_wsum[i + wj]) { return false; } } // wj loop Loading
src/trapping/cfrfme.cpp +9 −2 Original line number Diff line number Diff line Loading @@ -66,6 +66,8 @@ #endif #ifdef USE_TARGET_OFFLOAD #include <cstdlib> /*! \brief Specialized function to perform GPU-offloaded trapping loop. * * The offload of GPU operations through interface layers, such as OpenMP, Loading Loading @@ -461,9 +463,13 @@ void frfme(string data_file, string output_path) { int size_global_vec_w = nkvs * (jlml - jlmf + 1); int size_vec_tt1_wk = nkvs * nlmmt; const dcomplex *vec_tt1_wk = tt1->wk; dcomplex *vec_wsum = tfrfme->wsum[0]; dcomplex *vec_wsum = tfrfme->vec_wsum; double *vec_vkzm = tt2->vec_vkzm; #ifdef USE_TARGET_OFFLOAD dcomplex *global_vec_w = (dcomplex *)aligned_alloc(64, size_global_vec_w * sizeof(dcomplex)); #else dcomplex *global_vec_w = new dcomplex[size_global_vec_w](); #endif // USE_TARGET_OFFLOAD message = "INFO: looping over " + to_string(jlml - jlmf + 1) + " J iterations.\n"; logger.log(message); #ifdef USE_TARGET_OFFLOAD Loading @@ -490,6 +496,7 @@ void frfme(string data_file, string output_path) { sprintf(buffer, "INFO: loop calculation took %lfs.\n", elapsed.count()); message = string(buffer); logger.log(message); free(global_vec_w); #else #pragma omp parallel for for (int j80 = jlmf - 1; j80 < jlml; j80++) { Loading Loading @@ -550,8 +557,8 @@ void frfme(string data_file, string output_path) { vec_wsum[(j80 * nrvc) + ixyz] = sumy * delks; } // ixyz loop } // j80 loop #endif // USE_TARGET_OFFLOAD delete[] global_vec_w; #endif // USE_TARGET_OFFLOAD #ifdef USE_NVTX nvtxRangePop(); #endif Loading
src/trapping/clffft.cpp +2 −1 Original line number Diff line number Diff line Loading @@ -303,6 +303,7 @@ void lffft(string data_file, string output_path) { // label 160 const int nlmm = lm * (lm + 2); const int nlmmt = nlmm + nlmm; const int nrvc = nxv * nyv * nzv; ws = new dcomplex[nlmmt](); if (lm > le) wsl = new dcomplex[nlmmt](); // FORTRAN writes two output formatted files without opening them Loading @@ -320,7 +321,7 @@ void lffft(string data_file, string output_path) { //binary_input.read(reinterpret_cast<char *>(&vimag), sizeof(double)); int row = i; int col = (nyv * nxv * iz475) + (nxv * iy475) + ix475; dcomplex value = tfrfme->wsum[row][col]; dcomplex value = tfrfme->vec_wsum[nrvc * row + col]; if (lm <= le) { ws[i] = value; } else { // label 170 Loading