Commit fbf95ca0 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Move C9 data structure under ParticleDescriptor

parent fb3f98b5
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -862,7 +862,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
#ifdef USE_NVTX
  nvtxRangePush("Average calculation");
#endif
  ztm(cid->am, cid->c1, cid->c9);
  ztm(cid->am, cid->c1);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam3 = new VirtualAsciiFile();
  string outam3_name = output_path + "/c_AM3_JXI" + to_string(jxi488) + ".txt";
+30 −62
Original line number Diff line number Diff line
@@ -38,65 +38,8 @@
#endif

class ParticleDescriptor;
class mixMPI;

/*! \brief Representation of the FORTRAN C9 blocks.
 */
class C9 {
protected:
  //! \brief Number of rows in the GIS and GLS matrices
  int gis_size_0;
  //! \brief Number of rows in the SAM matrix
  int sam_size_0;

public:
  //! \brief NLEM = LE * (LE + 2)
  int nlem;
  //! \brief NLEMT = 2 * LE * (LE + 2)
  int nlemt;
  //! \brief QUESTION: definition?
  dcomplex **gis;
  //! \brief QUESTION: definition?
  dcomplex **gls;
  //! \brief QUESTION: definition?
  dcomplex **sam;

  /*! \brief C9 instance constructor.
   *
   * \param ndi: `int` NDI = NSPH * LI * (LI + 2)
   * \param nlem: `int` NLEM = LE * (LE + 2)
   * \param ndit: `int` NDIT = 2 * NSPH * LI * (LI + 2)
   * \param nlemt: `int` NLEMT = 2 * LE * (LE + 2)
   */
  C9(int ndi, int nlem, int ndit, int nlemt);

  /*! \brief C9 instance constructor copying contents from preexisting object.
   *
   * \param rhs: `C9` preexisting object to copy from
   */
  C9(const C9& rhs);

  /*! \brief C9 instance destroyer.
   */
  ~C9();

#ifdef MPI_VERSION
  /*! \brief C9 instance constructor copying all contents off MPI broadcast from MPI process 0
   *
   * \param mpidata: `mixMPI *` pointer to MPI data structure.
   */
  C9(const mixMPI *mpidata);

  /*! \brief send C9 instance from MPI process 0 via MPI broadcasts to all other processes
   *
   * \param mpidata: `mixMPI *` pointer to MPI data structure.
   */
  void mpibcast(const mixMPI *mpidata);
#endif

};

/*! \brief structure with essential MPI data.
/*! \brief Structure with essential MPI data.
 */
class mixMPI {
public:
@@ -133,8 +76,6 @@ class ClusterIterationData {
public:
  //! \brief Pointer to a ParticleDescriptor structure.
  ParticleDescriptor *c1;
  //! \brief Pointer to a C9 structure.
  C9 *c9;
  //! \brief Vector of geometric asymmetry factors.
  double *gaps;
  double **tqse;
@@ -264,7 +205,14 @@ protected:
  // >>> END OF SECTION NEEDED BY SPHERE AND CLUSTER <<< //

  // >>> NEEDED BY CLUSTER <<< //
  //! \brief Contiguous space for TSAS.
  dcomplex *vec_tsas;
  //! \brief Contiguous space for GIS.
  dcomplex *vec_gis;
  //! \brief Contiguous space for GLS.
  dcomplex *vec_gls;
  //! \brief Contiguous space for SAM.
  dcomplex *vec_sam;
  // >>> END OF SECTION NEEDED BY CLUSTER <<< //
  
  // >>> NEEDED BY CLUSTER AND INCLU <<<
@@ -272,9 +220,9 @@ protected:
  int _le;
  //! \brief Maximum field expansion order.
  int _lm;
  //! \brief NLIM = NSPH * LI * (LI + 2)
  //! \brief NLIM = LI * (LI + 2)
  int _nlim;
  //! \brief NLEM = NSPH * LE * (LE + 2)
  //! \brief NLEM = LE * (LE + 2)
  int _nlem;
  //! \brief NLEMT = 2 * NLEM
  int _nlemt;
@@ -292,6 +240,10 @@ protected:
  int _lmtpos;
  //! \brief NV3J = (LM * (LM + 1) * (2 * LM + 7)) / 6
  int _nv3j;
  //! \brief NDI = NSPH * NLIM
  int _ndi;
  //! \brief NDIT = 2 * NSPH * NLIM
  int _ndit;
  //! \brief Contiguous space for AM0M
  dcomplex *vec_am0m;
  //! \brief Contiguous space for FSAC
@@ -304,6 +256,10 @@ protected:
  int *vec_ind3j;
  // >>> END OF SECTION NEEDED BY CLUSTER AND INCLU <<< //

  // >>> NEEDED BY INCLU <<< //
  //! \brief NDM = NDIT + NLEMT
  int _ndm;
  // >>> END OF SECTION NEEDED BY INCLU <<< //
public:
  // >>> COMMON TO ALL DESCRIPTOR TYPES <<< //
  //! \brief Base sub-class identification code.
@@ -405,6 +361,12 @@ public:
  double ecs;
  //! \brief Total absorption cross-section.
  double acs;
  //! \brief TBD.
  dcomplex **gis;
  //! \brief TBD.
  dcomplex **gls;
  //! \brief TBD.
  dcomplex **sam;
  // >>> END OF SECTION NEEDED BY CLUSTER <<< //
  
  // >>> NEEDED BY CLUSTER AND INCLU <<<
@@ -432,6 +394,12 @@ public:
  const int& lmtpos = _lmtpos;
  //! \brief Read-only view of NV3J.
  const int& nv3j = _nv3j;
  //! \brief Read-only view of NDI.
  const int& ndi = _ndi;
  //! \brief Read-only view of NDIT.
  const int& ndit = _ndit;
  //! \brief Read-only view of NDM.
  const int& ndm = _ndm;

  //! \brief TBD
  dcomplex *vh;
+1 −2
Original line number Diff line number Diff line
@@ -404,8 +404,7 @@ void tqr(
 *
 * \param am: `complex double **`
 * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance.
 * \param c9: `C9 *` Pointer to a C9 instance.
 */
void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9);
void ztm(dcomplex **am, ParticleDescriptor *c1);

#endif
+74 −92
Original line number Diff line number Diff line
@@ -36,93 +36,6 @@
#include <mpi.h>
#endif

C9::C9(int ndi, int _nlem, int ndit, int _nlemt) {
  gis_size_0 = ndi;
  sam_size_0 = ndit;
  nlem = _nlem;
  nlemt = _nlemt;
  gis = new dcomplex*[ndi];
  gls = new dcomplex*[ndi];
  // allocate these arrays to be contiguous, C-style
  gis[0] = new dcomplex[ndi*nlem]();
  gls[0]  = new dcomplex[ndi*nlem]();  
  for (int gi = 1; gi < ndi; gi++) {
    gis[gi] = gis[0] + gi*nlem;
    gls[gi] = gls[0] + gi*nlem;
  }
  sam = new dcomplex*[ndit];
  sam[0] = new dcomplex[ndit*nlemt]();
  for (int si = 1; si < ndit; si++) sam[si] = sam[0] + si*nlemt;
}

C9::C9(const C9& rhs) {
  gis_size_0 = rhs.gis_size_0;
  sam_size_0 = rhs.sam_size_0;
  nlem = rhs.nlem;
  nlemt = rhs.nlemt;
  gis = new dcomplex*[gis_size_0];
  gls = new dcomplex*[gis_size_0];
  // allocate these arrays to be contiguous, C-style
  gis[0] = new dcomplex[gis_size_0*nlem]();
  gls[0]  = new dcomplex[gis_size_0*nlem]();  
  memcpy(gis[0], rhs.gis[0], gis_size_0*nlem*sizeof(dcomplex));
  memcpy(gls[0], rhs.gls[0], gis_size_0*nlem*sizeof(dcomplex));
  for (int gi = 1; gi < gis_size_0; gi++) {
    gis[gi] = gis[0] + gi*nlem;
    gls[gi] = gls[0] + gi*nlem;
  }
  sam = new dcomplex*[sam_size_0];
  sam[0] = new dcomplex[sam_size_0*nlemt]();
  memcpy(sam[0], rhs.sam[0], sam_size_0*nlemt*sizeof(dcomplex));
  for (int si = 1; si < sam_size_0; si++) {
    sam[si] = sam[0]+si*nlemt;
  }
}

C9::~C9() {
  delete[] gis[0];
  delete[] gls[0];
  delete[] gis;
  delete[] gls;
  delete[] sam[0];
  delete[] sam;
}

#ifdef MPI_VERSION
C9::C9(const mixMPI *mpidata) {
  MPI_Bcast(&gis_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&sam_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
  gis = new dcomplex*[gis_size_0];
  gls = new dcomplex*[gis_size_0];
  gis[0] = new dcomplex[gis_size_0*nlem]();
  gls[0]  = new dcomplex[gis_size_0*nlem]();  
  MPI_Bcast(gis[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  MPI_Bcast(gls[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  for (int gi = 1; gi < gis_size_0; gi++) {
    gis[gi] = gis[0] + gi*nlem;
    gls[gi] = gls[0] + gi*nlem;
  }
  sam = new dcomplex*[sam_size_0];
  sam[0] = new dcomplex[sam_size_0*nlemt]();
  MPI_Bcast(sam[0], sam_size_0*nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  for (int si = 1; si < sam_size_0; si++) {
    sam[si] = sam[0]+si*nlemt;
  }
}

void C9::mpibcast(const mixMPI *mpidata) {
  MPI_Bcast(&gis_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&sam_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(gis[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  MPI_Bcast(gls[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  MPI_Bcast(sam[0], sam_size_0*nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
}
#endif

mixMPI::mixMPI() {
  mpirunning = 0;
  rank = 0;
@@ -152,7 +65,6 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter
  c1 = new ParticleDescriptorCluster(gconf, sconf);
  const int ndi = c1->nsph * c1->nlim;
  const np_int ndit = 2 * ndi;
  c9 = new C9(ndi, c1->nlem, 2 * ndi, 2 * c1->nlem);
  gaps = new double[c1->nsph]();
  tqev = new double[3]();
  tqsv = new double[3]();
@@ -254,7 +166,6 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
  c1 = new ParticleDescriptorCluster(reinterpret_cast<ParticleDescriptorCluster &>(*(rhs.c1)));
  const int ndi = c1->nsph * c1->nlim;
  const np_int ndit = 2 * ndi;
  c9 = new C9(*(rhs.c9));
  gaps = new double[c1->nsph]();
  for (int gi = 0; gi < c1->nsph; gi++) gaps[gi] = rhs.gaps[gi];
  tqev = new double[3]();
@@ -400,7 +311,6 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int devi
  c1 = new ParticleDescriptorCluster(mpidata);
  const int ndi = c1->nsph * c1->nlim;
  const np_int ndit = 2 * ndi;
  c9 = new C9(mpidata);
  gaps = new double[c1->nsph]();
  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  tqev = new double[3]();
@@ -534,7 +444,6 @@ void ClusterIterationData::mpibcast(const mixMPI *mpidata) {
  c1->mpibcast(mpidata);
  const int ndi = c1->nsph * c1->nlim;
  const np_int ndit = 2 * ndi;
  c9->mpibcast(mpidata);
  MPI_Bcast(gaps, c1->nsph, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Bcast(tqev, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Bcast(tqsv, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
@@ -615,7 +524,6 @@ ClusterIterationData::~ClusterIterationData() {
  }
  delete[] zpv;
  delete c1;
  delete c9;
  delete[] gaps;
  for (int ti = 1; ti > -1; ti--) {
    delete[] tqse[ti];
@@ -690,6 +598,9 @@ ParticleDescriptor::ParticleDescriptor(GeometryConfiguration *gconf, ScattererCo
  _lmtpo = 0;
  _lmtpos = 0;
  _nv3j = 0;
  _ndi = 0;
  _ndit = 0;
  _ndm = 0;
  _num_configurations = sconf->configurations;
  _num_layers = (sconf->use_external_sphere) ? 1 : 0;
  for (int nli = 0; nli < num_configurations; nli++) _num_layers += sconf->get_nshl(nli);
@@ -821,6 +732,9 @@ ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) {
  _lmtpo = rhs._lmtpo;
  _lmtpos = rhs._lmtpos;
  _nv3j = rhs._nv3j;
  _ndi = rhs._ndi;
  _ndit = rhs._ndit;
  _ndm = rhs._ndm;
  _num_configurations = rhs._num_configurations;
  _num_layers = rhs._num_layers;

@@ -904,11 +818,15 @@ ParticleDescriptor::ParticleDescriptor(const ParticleDescriptor &rhs) {
  // >>> NEEDED BY CLUSTER <<<
  vintt = NULL;
  tfsas = 0.0 + I * 0.0;
  vec_tsas = NULL;
  tsas = NULL;
  gcs = 0.0;
  scs = 0.0;
  ecs = 0.0;
  acs = 0.0;
  vec_gis = NULL;
  vec_gls = NULL;
  vec_sam = NULL;
  // >>> NEEDED BY CLUSTER AND INCLU <<<
  vec_am0m = NULL;
  vec_fsac = NULL;
@@ -964,6 +882,9 @@ ParticleDescriptor::ParticleDescriptor(const mixMPI *mpidata) {
  MPI_Bcast(&_lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_ndi, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_ndit, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_ndm, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_num_configurations, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_num_layers, 1, MPI_INT, 0, MPI_COMM_WORLD);

@@ -1039,6 +960,9 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) {
  MPI_Bcast(&_lmtpo, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_lmtpos, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_nv3j, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_ndi, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_ndit, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_ndm, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_num_configurations, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&_num_layers, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(vec_rmi, _nsph * _li, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
@@ -1085,6 +1009,9 @@ void ParticleDescriptor::mpibcast(const mixMPI *mpidata) {
    MPI_Bcast(&scs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    MPI_Bcast(&ecs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    MPI_Bcast(&acs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    MPI_Bcast(vec_gis, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
    MPI_Bcast(vec_gls, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
    MPI_Bcast(vec_sam, _ndit * _nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  }
  // >>> NEEDED BY CLUSTER AND INCLU <<< //
  if (_class_type == CLUSTER_TYPE || _class_type == INCLUSION_TYPE) {
@@ -1211,6 +1138,12 @@ ParticleDescriptor::~ParticleDescriptor() {
    delete[] vintt;
    delete[] vec_tsas;
    delete[] tsas;
    delete[] vec_gis;
    delete[] vec_gls;
    delete[] vec_sam;
    delete[] gis;
    delete[] gls;
    delete[] sam;
  }
}

@@ -1260,6 +1193,8 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(GeometryConfiguration *gcon
  _lmtpo = _li + _le + 1;
  _lmtpos = _lmtpo * _lmtpo;
  _nv3j = (_lm * (_lm + 1) * (2 * _lm + 7)) / 6;
  _ndi = _nsph * _nlim;
  _ndit = 2 * _nsph * _nlim;
  vec_am0m = new dcomplex[_nlemt * _nlemt]();
  vec_fsac = new dcomplex[4]();
  vec_sac = new dcomplex[4]();
@@ -1293,6 +1228,19 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(GeometryConfiguration *gcon
  ind3j = new int*[_lm + 1];
  for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii);
  rac3j = new double[_lmtpo]();

  // Needed by CLUSTER
  vec_gis = new dcomplex[_ndi * _nlem]();
  gis = new dcomplex*[_ndi];
  vec_gls = new dcomplex[_ndi * _nlem]();
  gls = new dcomplex*[_ndi];
  for (int gi = 0; gi < _ndi; gi++) {
    gis[gi] = vec_gis + (gi * _nlem);
    gls[gi] = vec_gls + (gi * _nlem);
  }
  vec_sam = new dcomplex[_ndit * _nlemt]();
  sam = new dcomplex*[_ndit];
  for (int si = 0; si < _ndit; si++) sam[si] = vec_sam + (si * _nlemt);
}

ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorCluster &rhs) : ParticleDescriptor(rhs) {
@@ -1401,6 +1349,24 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorClu
  for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii);
  rac3j = new double[_lmtpo];
  for (int ri = 0; ri < _lmtpo; ri++) rac3j[ri] = rhs.rac3j[ri];

  // Needed by CLUSTER
  vec_gis = new dcomplex[_ndi * _nlem];
  vec_gls = new dcomplex[_ndi * _nlem];
  for (int vi = 0; vi < _ndi * _nlem; vi++) {
    vec_gis[vi] = rhs.vec_gis[vi];
    vec_gls[vi] = rhs.vec_gls[vi];
  }
  gis = new dcomplex*[_ndi];
  gls = new dcomplex*[_ndi];
  for (int gi = 0; gi < _ndi; gi++) {
    gis[gi] = vec_gis + (gi * _nlem);
    gls[gi] = vec_gls + (gi * _nlem);
  }
  vec_sam = new dcomplex[_ndit * _nlemt];
  for (int vi = 0; vi < _ndit * _nlemt; vi++) vec_sam[vi] = rhs.vec_sam[vi];
  sam = new dcomplex*[_ndit];
  for (int si = 0; si < _ndit; si++) sam[si] = vec_sam + (si * _nlemt);
}

#ifdef MPI_VERSION
@@ -1448,6 +1414,22 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const mixMPI *mpidata) : Pa
  MPI_Bcast(&scs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Bcast(&ecs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Bcast(&acs, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  vec_gis = new dcomplex[_ndi * _nlem];
  MPI_Bcast(vec_gis, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  vec_gls = new dcomplex[_ndi * _nlem];
  MPI_Bcast(vec_gls, _ndi * _nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  vec_sam = new dcomplex[_ndit * _nlemt];
  MPI_Bcast(vec_sam, _ndit * _nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  gis = new dcomplex*[_ndi];
  gls = new dcomplex*[_ndi];
  for (int gi = 0; gi < _ndi; gi++) {
    gis[gi] = vec_gis + (gi * _nlem);
    gls[gi] = vec_gls + (gi * _nlem);
  }
  sam = new dcomplex*[_ndit];
  for (int si = 0; si < _ndit; si++) {
    sam[si] = vec_sam + (si * _nlemt);
  }
  // >>> NEEDED BY CLUSTER AND INCLU <<<
  vec_am0m = new dcomplex[_nlemt * _nlemt];
  int nlemts = _nlemt * _nlemt;
+24 −29
Original line number Diff line number Diff line
@@ -2533,11 +2533,9 @@ void tqr(
  tsk = u[0] * tqsv[0] + u[1] * tqsv[1] + u[2] * tqsv[2];
}

void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9) {
void ztm(dcomplex **am, ParticleDescriptor *c1) {
  dcomplex gie, gle, a1, a2, a3, a4, sum1, sum2, sum3, sum4;
  const dcomplex cc0 = 0.0 + 0.0 * I;
  const np_int ndi = c1->nsph * c1->nlim;
  np_int ndit = 2 * ndi;
  // int i2 = 0; // old implementation
#ifdef USE_NVTX
  nvtxRangePush("ZTM starts");
@@ -2546,8 +2544,8 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9) {
  nvtxRangePush("ZTM parallel loop 1");
#endif
  // C9 *c9_para = new C9(*c9);
  dcomplex *gis_v = c9->gis[0];
  dcomplex *gls_v = c9->gls[0];
  dcomplex *gis_v = c1->gis[0];
  dcomplex *gls_v = c1->gls[0];
  double *rac3j_local = (double *) malloc(c1->lmtpo*sizeof(double));
  int k2max = c1->li*(c1->li+2);
  int k3max = c1->le*(c1->le+2);
@@ -2591,7 +2589,7 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9) {
	int m2 = -l2 - 1 + im2;
	int i3 = l3 * l3 + im3 - 1;
	int m3 = -l3 - 1 + im3;
	int vecindex = (i2 - 1)*c9->nlem + i3 - 1;
	int vecindex = (i2 - 1) * c1->nlem + i3 - 1;
	gis_v[vecindex] = ghit_d(2, 0, n2, l2, m2, l3, m3, c1, rac3j_local);
	gls_v[vecindex] = ghit_d(2, 1, n2, l2, m2, l3, m3, c1, rac3j_local);
      } // close k3 loop, former l3 + im3 loops
@@ -2605,26 +2603,26 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9) {
  nvtxRangePush("ZTM loop 2");
#endif
  dcomplex *am_v = am[0];
  dcomplex *sam_v = c9->sam[0];
  dcomplex *sam_v = c1->sam[0];
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd collapse(2)
#endif
  for (int i1 = 1; i1 <= ndi; i1++) { // GPU portable?
  for (int i1 = 1; i1 <= c1->ndi; i1++) { // GPU portable?
    for (int i3 = 1; i3 <= c1->nlem; i3++) {
      dcomplex sum1 = cc0;
      dcomplex sum2 = cc0;
      dcomplex sum3 = cc0;
      dcomplex sum4 = cc0;
      int i1e = i1 + ndi;
      int i1e = i1 + c1->ndi;
      int i3e = i3 + c1->nlem;
#pragma parallel for simd reduction(+:sum1,sum2,sum3,sum4)
      for (int i2 = 1; i2 <= ndi; i2++) {
	int i2e = i2 + ndi;
	int vecindg_23 = (i2 - 1)*c9->nlem + i3 - 1;
      for (int i2 = 1; i2 <= c1->ndi; i2++) {
	int i2e = i2 + c1->ndi;
	int vecindg_23 = (i2 - 1) * c1->nlem + i3 - 1;
	dcomplex gie = gis_v[vecindg_23];
	dcomplex gle = gls_v[vecindg_23];
	np_int vecinda_1 = (i1 - 1)*ndit;
	np_int vecinda_1e = (i1 - 1 + ndi)*ndit;
	np_int vecinda_1 = (i1 - 1) * c1->ndit;
	np_int vecinda_1e = (i1 - 1 + c1->ndi) * c1->ndit;
	dcomplex a1 = am_v[vecinda_1 + i2 - 1];
	dcomplex a2 = am_v[vecinda_1 + i2e - 1];
	dcomplex a3 = am_v[vecinda_1e + i2 - 1];
@@ -2634,8 +2632,8 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9) {
	sum3 += (a3 * gie + a4 * gle);
	sum4 += (a3 * gle + a4 * gie);
      } // i2 loop
      int vecind1 = (i1 - 1)*c9->nlemt;
      int vecind1e = (i1e - 1)*c9->nlemt;
      int vecind1 = (i1 - 1) * c1->nlemt;
      int vecind1e = (i1e - 1) * c1->nlemt;
      sam_v[vecind1 + i3 - 1] = sum1;
      sam_v[vecind1 + i3e - 1] = sum2;
      sam_v[vecind1e + i3 - 1] = sum3;
@@ -2643,39 +2641,36 @@ void ztm(dcomplex **am, ParticleDescriptor *c1, C9 * c9) {
    } // i3 loop
  } // i1 loop
#pragma omp parallel for collapse(2)
  for (int i1 = 1; i1 <= ndi; i1++) {
  for (int i1 = 1; i1 <= c1->ndi; i1++) {
    for (int i0 = 1; i0 <= c1->nlem; i0++) {
      int vecindex = (i1 - 1)*c9->nlem + i0 - 1;
      int vecindex = (i1 - 1) * c1->nlem + i0 - 1;
      gis_v[vecindex] = dconjg(gis_v[vecindex]);
      gls_v[vecindex] = dconjg(gls_v[vecindex]);
      // c9->gis[i1 - 1][i0 - 1] = dconjg(c9->gis[i1 - 1][i0 - 1]);
      // c9->gls[i1 - 1][i0 - 1] = dconjg(c9->gls[i1 - 1][i0 - 1]);
    } // i0 loop
  } // i1 loop
  int nlemt = c1->nlem + c1->nlem;
  dcomplex *vec_am0m = c1->am0m[0];
#ifdef USE_TARGET_OFFLOAD
#pragma omp target parallel for collapse(2)
#endif
  for (int i0 = 1; i0 <= c1->nlem; i0++) {
    for (int i3 = 1; i3 <= nlemt; i3++) {
    for (int i3 = 1; i3 <= c1->nlemt; i3++) {
      int i0e = i0 + c1->nlem;
      dcomplex sum1 = cc0;
      dcomplex sum2 = cc0;
      for (int i1 = 1; i1 <= ndi; i1 ++) {
	int i1e = i1 + ndi;
	int vecind1 = (i1 - 1)*c9->nlemt;
	int vecind1e = (i1e - 1)*c9->nlemt;
      for (int i1 = 1; i1 <= c1->ndi; i1 ++) {
	int i1e = i1 + c1->ndi;
	int vecind1 = (i1 - 1) * c1->nlemt;
	int vecind1e = (i1e - 1) * c1->nlemt;
	a1 = sam_v[vecind1 + i3 - 1];
	a2 = sam_v[vecind1e + i3 - 1];
	int vecindex = (i1 - 1)*c9->nlem + i0 - 1;
	int vecindex = (i1 - 1) * c1->nlem + i0 - 1;
	gie = gis_v[vecindex];
	gle = gls_v[vecindex];
	sum1 += (a1 * gie + a2 * gle);
	sum2 += (a1 * gle + a2 * gie);
      } // i1 loop
      int vecind0 = (i0 - 1)*nlemt;
      int vecind0e = (i0e - 1)*nlemt;
      int vecind0 = (i0 - 1) * c1->nlemt;
      int vecind0e = (i0e - 1) * c1->nlemt;
      vec_am0m[vecind0 + i3 - 1] = -sum1;
      vec_am0m[vecind0e + i3 - 1] = -sum2;
      // c1->am0m[i0 - 1][i3 - 1] = -sum1;