Commit 120a5b3a authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Merge branch 'offload_trapping' into 'master'

Offload trapping

See merge request giacomo.mulas/np_tmcode!98
parents 25e0acf4 3af28bf7
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -364,8 +364,8 @@ result=$?
if [ "x$result" = "x0" ]; then
    CLANGFLAGS=" -stdlib=libstdc++"
fi
echo -n "configure: checking wether $CXX works... "
echo -n "configure: checking wether $CXX works... " >>configure.log
echo -n "configure: checking whether $CXX works... "
echo -n "configure: checking whether $CXX works... " >>configure.log
cat > test_compiler.cpp <<EOF
int main() {
  int i = -1;
@@ -385,8 +385,8 @@ else
    echo "ERROR: $CXX is not a working C++ compiler!" >>configure.log
    exit 2
fi
echo -n "configure: checking wether $CXX supports -ggdb... "
echo -n "configure: checking wether $CXX supports -ggdb... " >>configure.log
echo -n "configure: checking whether $CXX supports -ggdb... "
echo -n "configure: checking whether $CXX supports -ggdb... " >>configure.log
$CXX $CLANGFLAGS -ggdb test_compiler.cpp -o test_compiler > /dev/null 2>>error.log
result=$?
if [ "x$result" = "x0" ]; then
@@ -410,8 +410,8 @@ else
    echo "no"
    echo "no" >>configure.log
fi
echo -n "configure: checking wether $CXX is a MPI compiler... "
echo -n "configure: checking wether $CXX is a MPI compiler... " >>configure.log
echo -n "configure: checking whether $CXX is a MPI compiler... "
echo -n "configure: checking whether $CXX is a MPI compiler... " >>configure.log
cat > test_compiler.cpp <<EOF
# include <mpi.h>
int main() {
+18 −18
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@ class Swap1 {
protected:
  //! Index of the last element to be filled.
  int _last_index;
  //! Number of vector coordinates. QUESTION: correct?
  //! Number of beam description wave-numbers.
  int _nkv;
  //! NLMMT = 2 * LM * (LM + 2)
  int _nlmmt;
@@ -69,7 +69,7 @@ public:
  /*! \brief Swap1 instance constructor.
   *
   * \param lm: `int` Maximum field expansion order.
   * \param _nkv: `int` Number of vector coordinates. QUESTION: correct?
   * \param nkv: `int` Number of beam description wave numbers.
   */
  Swap1(int lm, int nkv);

@@ -130,7 +130,7 @@ protected:
  int _last_vector;
  //! Index of the last matrix element to be filled.
  int _last_matrix;
  //! Number of vector coordinates. QUESTION: correct?
  //! Number of beam description wave numbers.
  int _nkv;
  //! Contiguous vector of VKZM matrix.
  double *vec_vkzm;
@@ -152,13 +152,13 @@ protected:
  double _delxyz;
  //! QUESTION: definition?
  double _vknmx;
  //! QUESTION: definition?
  //! Wave number grid spacing.
  double _delk;
  //! QUESTION: definition?
  //! Square of wave number grid spacing.
  double _delks;
  //! NLMMT = LM * (LM + 2) * 2
  int _nlmmt;
  //! Number of radial vector coordinates. QUESTION: correct?
  //! Number of radial vector coordinates.
  int _nrvc;

  /*! \brief Load a Swap2 instance from a HDF5 binary file.
@@ -192,7 +192,7 @@ public:
  const int &last_vector = _last_vector;
  //! Read-only view on the index of the last matrix element to be filled.
  const int &last_matrix = _last_matrix;
  //! Read-only view on the number of vector coordinates. QUESTION: correct?
  //! Read-only view on the number of beam description wave numbers.
  const int &nkv = _nkv;
  //! QUESTION: definition?
  double *vkv;
@@ -222,12 +222,12 @@ public:
  const double &delks = _delks;
  //! NLMMT = LM * (LM + 2) * 2
  const int &nlmmt = _nlmmt;
  //! Number of radial vector coordinates. QUESTION: correct?
  //! Read-only view on the number of radial vector coordinates.
  const int &nrvc = _nrvc;

  /*! \brief Swap2 instance constructor.
   *
   * \param nkv: `int` Number of vector coordinates. QUESTION: correct?
   * \param nkv: `int` Number of beam description wave numbers.
   */
  Swap2(int nkv);

@@ -252,7 +252,7 @@ public:

  /*! \brief Calculate the necessary amount of memory to create a new instance.
   *
   * \param nkv: `int` Number of radial vector coordinates. QUESTION: correct?
   * \param nkv: `int` Number of beam description wave numbers.
   * \return size: `long` The necessary memory size in bytes.
   */
  static long get_size(int nkv);
@@ -316,11 +316,11 @@ protected:
  int _nlmmt;
  //! NRVC = NXV * NYV * NZV
  int _nrvc;
  //! Field expansion mode identifier.
  //! Beam description mode.
  int _lmode;
  //! Maximum field expansion order.
  int _lm;
  //! QUESTION: definition?
  //! Number of beam description wave numbers.
  int _nkv;
  //! Number of computed X coordinates.
  int _nxv;
@@ -332,11 +332,11 @@ protected:
  double _vk;
  //! External medium refractive index
  double _exri;
  //! QUESTION: definition?
  //! Numerical aperture.
  double _an;
  //! QUESTION: definition?
  //! Filling factor.
  double _ff;
  //! QUESTION: definition?
  //! Lens transmission.
  double _tra;
  //! QUESTION: definition?
  double _spd;
@@ -402,11 +402,11 @@ public:
  const double& vk = _vk;
  //! Read-only view on external medium refractive index
  const double& exri = _exri;
  //! QUESTION: definition?
  //! Read-only view on numeric aperture.
  const double& an = _an;
  //! QUESTION: definition?
  //! Read-only view on filling factor.
  const double& ff = _ff;
  //! QUESTION: definition?
  //! Read-only view on lens transmission.
  const double& tra = _tra;
  //! QUESTION: definition?
  const double& spd = _spd;
+4 −2
Original line number Diff line number Diff line
@@ -340,13 +340,15 @@ GeometryConfiguration* GeometryConfiguration::from_legacy(const std::string& fil
  // Read optional configuration data used only by the C++ code.
  while (num_lines > last_read_line) {
    str_target = file_lines[last_read_line++];
    if (str_target.size() > 0) {
    if (str_target.size() > 15) {
      if (str_target.substr(0, 15).compare("USE_REFINEMENT=") == 0) {
	regex_search(str_target, m, re);
	short refine_flag = (short)stoi(m.str());
	conf->_refine_flag = refine_flag;
      }
      else if (str_target.substr(0, 14).compare("USE_DYN_ORDER=") == 0) {
    }
    if (str_target.size() > 14) {
      if (str_target.substr(0, 14).compare("USE_DYN_ORDER=") == 0) {
	regex_search(str_target, m, re);
	short dyn_order_flag = (short)stoi(m.str());
	conf->_dyn_order_flag = dyn_order_flag;
+115 −102
Original line number Diff line number Diff line
@@ -47,10 +47,6 @@
#include <omp.h>
#endif

#ifdef USE_TARGET_OFFLOAD
#pragma omp requires unified_shared_memory
#endif

using namespace std;

void apc(
@@ -407,9 +403,9 @@ dcomplex cdtp(dcomplex z, dcomplex **am, int i, int jf, int k, int nj) {
  return result;
}

#ifdef USE_TARGET_OFFLOAD
#pragma omp begin declare target device_type(any)
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp begin declare target device_type(any)
// #endif
double cgev(int ipamo, int mu, int l, int m) {
  double result = 0.0;
  double xd = 0.0, xn = 0.0;
@@ -443,9 +439,9 @@ double cgev(int ipamo, int mu, int l, int m) {
  }
  return result;
}
#ifdef USE_TARGET_OFFLOAD
#pragma omp end declare target
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp end declare target
// #endif

void cms(dcomplex **am, ParticleDescriptor *c1) {
  dcomplex dm, de, cgh, cgk;
@@ -649,9 +645,9 @@ void crsm1(double vk, double exri, ParticleDescriptor *c1) {
  delete[] svs;
}

#ifdef USE_TARGET_OFFLOAD
#pragma omp begin declare target device_type(any)
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp begin declare target device_type(any)
// #endif
dcomplex ghit_d(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2,
	      ParticleDescriptor *c1, double *rac3j
@@ -862,13 +858,13 @@ dcomplex ghit_d(
  }
  return result;
}
#ifdef USE_TARGET_OFFLOAD
#pragma omp end declare target
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp end declare target
// #endif

#ifdef USE_TARGET_OFFLOAD
#pragma omp begin declare target device_type(any)
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp begin declare target device_type(any)
// #endif
dcomplex ghit(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2,
	      ParticleDescriptor *c1
@@ -1079,9 +1075,9 @@ dcomplex ghit(
  }
  return result;
}
#ifdef USE_TARGET_OFFLOAD
#pragma omp end declare target
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp end declare target
// #endif

void hjv(
	 double exri, double vk, int &jer, int &lcalc, dcomplex &arg,
@@ -1339,11 +1335,12 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) {
#ifdef USE_NVTX
  nvtxRangePush("pcros intermediate loop 1");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
// #else
// #pragma omp parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
// #endif
#pragma omp parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
#endif
  for (int i12 = 0; i12 < nlemt; i12++) {
      // int i = i12 - 1;
      dcomplex am = cc0;
@@ -1408,11 +1405,12 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) {
  csam = -(ccs / (exri * vk)) * 0.5 * I;
  sum2 = cc0;
  sum3 = cc0;
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3)
// #else
// #pragma omp parallel for simd reduction(+:sum2,sum3)
// #endif
#pragma omp parallel for simd reduction(+:sum2,sum3)
#endif
  for (int i14 = 0; i14 < c1->nlem; i14++) { 
    int ie = i14 + c1->nlem;
    sum2 += (vec_am0m[nlemt*i14 + i14] + vec_am0m[nlemt*ie + ie]);
@@ -1420,11 +1418,12 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) {
  } // i14 loop
  double sumpi = 0.0;
  dcomplex sumpd = cc0;
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd)
// #else
// #pragma omp parallel for simd collapse(2) reduction(+:sumpi,sumpd)
// #endif
#pragma omp parallel for simd collapse(2) reduction(+:sumpi,sumpd)
#endif
  for (int i16 = 0; i16 < nlemt; i16++) {
    for (int j16 = 0; j16 < c1->nlem; j16++) {
      int je = j16 + c1->nlem;
@@ -1628,9 +1627,9 @@ void r3j000(int j2, int j3, double *rac3j) {
  }
}

#ifdef USE_TARGET_OFFLOAD
#pragma omp begin declare target device_type(any)
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp begin declare target device_type(any)
// #endif
void r3jjr(int j2, int j3, int m2, int m3, double *rac3j) {
  int jmx = j3 + j2;
  int jdf = j3 - j2;
@@ -1748,13 +1747,13 @@ void r3jjr(int j2, int j3, int m2, int m3, double *rac3j) {
    }
  }
}
#ifdef USE_TARGET_OFFLOAD
#pragma omp end declare target
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp end declare target
// #endif

#ifdef USE_TARGET_OFFLOAD
#pragma omp begin declare target device_type(any)
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp begin declare target device_type(any)
// #endif
void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) {
  int jmx = j3 + j2;
  int jdf = j3 - j2;
@@ -1872,9 +1871,9 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) {
    }
  }
}
#ifdef USE_TARGET_OFFLOAD
#pragma omp end declare target
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp end declare target
// #endif

void r3jmr(int j1, int j2, int j3, int m1, double *rac3j) {
  int mmx = (j2 < j3 - m1) ? j2 : j3 - m1;
@@ -2005,11 +2004,12 @@ void raba(
#ifdef USE_NVTX
  nvtxRangePush("raba inner loop 1");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:c1, c2)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:c1, c2)
// #else
// #pragma omp parallel for simd reduction(+:c1, c2)
// #endif
#pragma omp parallel for simd reduction(+:c1, c2)
#endif
  for (int j10 = 1; j10 <= nlemt; j10++) {
      int j = j10 - 1;
      c1 += (vec_am0m[i*nlemt+j] * vec_w[4*j]);
@@ -2027,11 +2027,12 @@ void raba(
#ifdef USE_NVTX
  nvtxRangePush("raba outer loop 2");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp teams distribute parallel for
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp teams distribute parallel for
// #else
// #pragma omp parallel for
// #endif
#pragma omp parallel for
#endif
  for (int ipo = 0; ipo < 2; ipo++) {
    int jpo = 1 - ipo;
    ctqce[ipo][0] = cc0;
@@ -2063,11 +2064,12 @@ void raba(
#ifdef USE_NVTX
    nvtxRangePush("raba inner loop 2");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
// #else
// #pragma omp parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
// #endif
#pragma omp parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
#endif
    for (int k = 1; k<=kmax; k++) {
      int l60 = (int) sqrt(k+1);
      int im60 = k - (l60*l60) + 1;
@@ -2140,11 +2142,12 @@ void raba(
#ifdef USE_NVTX
  nvtxRangePush("raba loop 3");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd
// #else
// #pragma omp parallel for simd
// #endif
#pragma omp parallel for simd
#endif
  for (int ipo78 = 1; ipo78 <= 2; ipo78++) {
    int ipo = ipo78 - 1;
    tqce[ipo][0] = real(ctqce[ipo][0] - ctqce[ipo][2]) * sq2i;
@@ -2214,11 +2217,12 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) {
#ifdef USE_NVTX
      nvtxRangePush("scr0 inner loop 1");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:sums, sum21)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21)
// #else
// #pragma omp parallel for simd reduction(+:sums, sum21)
// #endif
#pragma omp parallel for simd reduction(+:sums, sum21)
#endif
      for (int l10 = 1; l10 <= c1->li; l10++) {
	double fl = 1.0 * (l10 + l10 + 1);
	// dcomplex rm = 1.0 / c1->rmi[l10 - 1][i14 - 1];
@@ -2262,11 +2266,12 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) {
#ifdef USE_NVTX
  nvtxRangePush("scr0 loop 2");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas)
// #else
// #pragma omp parallel for simd reduction(+:scs, ecs, acs, tfsas)
// #endif
#pragma omp parallel for simd reduction(+:scs, ecs, acs, tfsas)
#endif
  for (int i14 = 1; i14 <= c1->nsph; i14++) {
    int iogi = c1->iog[i14 - 1];
    scs += c1->sscs[iogi - 1];
@@ -2328,11 +2333,12 @@ void scr2(
#ifdef USE_NVTX
      nvtxRangePush("scr2 inner loop 1");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22)
// #else
// #pragma omp parallel for simd reduction(-:s11, s21, s12, s22)
// #endif
#pragma omp parallel for simd reduction(-:s11, s21, s12, s22)
#endif
      for (int k = 1; k<=kmax; k++) {
	int l10 = (int) sqrt(k+1);
	int im10 = k - (l10*l10) + 1;
@@ -2384,11 +2390,12 @@ void scr2(
#ifdef USE_NVTX
  nvtxRangePush("scr2 loop 2");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
// #else
// #pragma omp parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
// #endif
#pragma omp parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
#endif
  for (int i14 = 1; i14 <= c1->nsph; i14++) {
    int i = i14 - 1;
    int iogi = c1->iog[i14 - 1];
@@ -2418,11 +2425,12 @@ void scr2(
#ifdef USE_NVTX
      nvtxRangePush("scr2 inner loop 3");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(4)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd collapse(4)
// #else
// #pragma omp parallel for simd collapse(4)
// #endif
#pragma omp parallel for simd collapse(4)
#endif
      for (int ipo1 = 1; ipo1 <=2; ipo1++) {
	for (int jpo1 = 1; jpo1 <= 2; jpo1++) {
	  for (int ipo2 = 1; ipo2 <= 2; ipo2++) {
@@ -2445,11 +2453,12 @@ void scr2(
#ifdef USE_NVTX
  nvtxRangePush("scr2 loop 4");
#endif
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for collapse(4)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for collapse(4)
// #else
// #pragma omp parallel for collapse(4)
// #endif
#pragma omp parallel for collapse(4)
#endif
  for (int ipo1 = 1; ipo1 <=2; ipo1++) {
    for (int jpo1 = 1; jpo1 <= 2; jpo1++) {
      for (int ipo2 = 1; ipo2 <= 2; ipo2++) {
@@ -2582,11 +2591,12 @@ void ztm(dcomplex **am, ParticleDescriptor *c1) {
  // but if it results im = 0, then we set l = l-1 and im = 2*l+1
  // furthermore if it results im > 2*l+1, then we set
  // im = im -(2*l+1) and l = l+1 (there was a rounding error in a nearly exact root)
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(3)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd collapse(3)
// #else
// #pragma omp parallel for simd collapse(3)
// #endif
#pragma omp parallel for simd collapse(3)
#endif
  for (int n2 = 1; n2 <= c1->nsph; n2++) { // GPU portable?
    for (int k2 = 1; k2<=k2max; k2++) {
      for (int k3 = 1; k3<=k3max; k3++) {
@@ -2632,11 +2642,12 @@ void ztm(dcomplex **am, ParticleDescriptor *c1) {
#endif
  dcomplex *am_v = am[0];
  dcomplex *sam_v = c1->sam[0];
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(2)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd collapse(2)
// #else
// #pragma omp parallel for simd collapse(2)
// #endif
#pragma omp parallel for simd collapse(2)
#endif
  for (int i1 = 1; i1 <= c1->ndi; i1++) { // GPU portable?
    for (int i3 = 1; i3 <= c1->nlem; i3++) {
      dcomplex sum1 = cc0;
@@ -2669,11 +2680,12 @@ void ztm(dcomplex **am, ParticleDescriptor *c1) {
      sam_v[vecind1e + i3e - 1] = sum4;
    } // i3 loop
  } // i1 loop
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(2)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd collapse(2)
// #else
// #pragma omp parallel for simd collapse(2)
// #endif 
#pragma omp parallel for simd collapse(2)
#endif 
  for (int i1 = 1; i1 <= c1->ndi; i1++) {
    for (int i0 = 1; i0 <= c1->nlem; i0++) {
      int vecindex = (i1 - 1) * c1->nlem + i0 - 1;
@@ -2682,11 +2694,12 @@ void ztm(dcomplex **am, ParticleDescriptor *c1) {
    } // i0 loop
  } // i1 loop
  dcomplex *vec_am0m = c1->am0m[0];
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(2)
#else
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp target teams distribute parallel for simd collapse(2)
// #else
// #pragma omp parallel for simd collapse(2)
// #endif
#pragma omp parallel for simd collapse(2)
#endif
  for (int i0 = 1; i0 <= c1->nlem; i0++) {
    for (int i3 = 1; i3 <= c1->nlemt; i3++) {
      int i0e = i0 + c1->nlem;
+0 −4
Original line number Diff line number Diff line
@@ -53,10 +53,6 @@
#include <omp.h>
#endif

#ifdef USE_TARGET_OFFLOAD
#pragma omp requires unified_shared_memory
#endif

void camp(dcomplex *ac, dcomplex **am0m, dcomplex *ws, CIL *cil) {
  for (int j = 0; j < cil->nlemt; j++) {
    for (int i = 0; i < cil->nlemt; i++) {
Loading