Commit 4dfc33af authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use aligned vector allocations when offloading to GPU

parent 57ac3b1a
Loading
Loading
Loading
Loading
+39 −4
Original line number Diff line number Diff line
@@ -44,6 +44,10 @@
#include "../include/file_io.h"
#endif

#ifdef USE_TARGET_OFFLOAD
#include <cstdlib>
#endif

using namespace std;

// >>> START OF Swap1 CLASS IMPLEMENTATION <<<
@@ -220,15 +224,32 @@ bool Swap1::operator ==(Swap1 &other) {
// >>> START OF Swap2 CLASS IMPLEMENTATION <<<
Swap2::Swap2(int nkv) {
  _nkv = nkv;
#ifdef USE_TARGET_OFFLOAD
  vkv = (double *)aligned_alloc(64, _nkv * sizeof(double));
  vec_vkzm = (double *)aligned_alloc(64, _nkv * _nkv * sizeof(double));
#pragma omp parallel for collapse(2)
  for (int i = 0; i < _nkv; i++) {
    for (int j = 0; j < _nkv; j++) {
      vkv[i] = 0.0;
      vec_vkzm[_nkv * i +j] = 0.0;
    }
  }
#else
  vkv = new double[_nkv]();
  vec_vkzm = new double[_nkv * _nkv]();
#endif // USE TARGET_OFFLOAD
  _last_vector = 0;
  _last_matrix = 0;
}

Swap2::~Swap2() {
#ifdef USE_TARGET_OFFLOAD
  free(vkv);
  free(vec_vkzm);
#else
  delete[] vkv;
  delete[] vec_vkzm;
#endif // USE_TARGET_OFFLOAD
}

Swap2* Swap2::from_binary(const std::string& file_name, const std::string& mode) {
@@ -578,19 +599,33 @@ TFRFME::TFRFME(int lmode, int lm, int nkv, int nxv, int nyv, int nzv) {
  _exril = 0.0;

  // Array initialization
  xv = new double[nxv]();
  yv = new double[nyv]();
  zv = new double[nzv]();
  _nlmmt = _lm * (_lm + 2) * 2;
  _nrvc = _nxv * _nyv * _nzv;
  vec_wsum = new dcomplex[nrvc * nlmmt]();
#ifdef USE_TARGET_OFFLOAD
  xv = (double *)aligned_alloc(64, sizeof(double) * _nxv);
  yv = (double *)aligned_alloc(64, sizeof(double) * _nyv);
  zv = (double *)aligned_alloc(64, sizeof(double) * _nzv);
  vec_wsum = (dcomplex *)aligned_alloc(64, sizeof(dcomplex) * _nrvc * _nlmmt);
#else
  xv = new double[_nxv]();
  yv = new double[_nyv]();
  zv = new double[_nzv]();
  vec_wsum = new dcomplex[_nrvc * _nlmmt]();
#endif // USE_TARGET_OFFLOAD
}

TFRFME::~TFRFME() {
#ifdef USE_TARGET_OFFLOAD
  free(xv);
  free(yv);
  free(zv);
  free(vec_wsum);
#else
  delete[] xv;
  delete[] yv;
  delete[] zv;
  delete[] vec_wsum;
#endif
}

TFRFME* TFRFME::from_binary(const std::string& file_name, const std::string& mode) {