Commit f1b1bf1a authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

Make parallel with OMP and, possibly, offloading, ztm() and scr2()

parent 9e49e770
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -55,8 +55,6 @@ class mixMPI;
 */
class C1 {
protected:
  //! \brief Number of spheres.
  int nsph;
  //! \brief Maximum order of field expansion.
  int lm;
  //! \brief Contiguous RMI vector.
@@ -69,6 +67,8 @@ protected:
  dcomplex *vec_vints;
  
public:
  //! \brief Number of spheres.
  int nsph;
  //! \brief NLMMT = 2 * LM * (LM + 2).
  int nlmmt;
  //! \brief Number of configurations
@@ -453,6 +453,7 @@ public:

};


/*! \brief Representation of the FORTRAN C9 blocks.
 */
class C9 {
@@ -461,12 +462,12 @@ protected:
  int gis_size_0;
  //! \brief Number of rows in the SAM matrix
  int sam_size_0;

public:
  //! \brief QUESTION: definition?
  int nlem;
  //! \brief QUESTION: definition?
  int nlemt;

public:
  //! \brief QUESTION: definition?
  dcomplex **gis;
  //! \brief QUESTION: definition?
+35 −0
Original line number Diff line number Diff line
@@ -119,6 +119,28 @@ void cms(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6);
 */
void crsm1(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6);

/*! \brief Compute the transfer vector from N2 to N1.
 *
 * This function computes the transfer vector going from N2 to N1, using either
 * Hankel, Bessel or Bessel from origin functions.
 *
 * \param ihi: `int`
 * \param ipamo: `int`
 * \param nbl: `int`
 * \param l1: `int`
 * \param m1: `int`
 * \param l2: `int`
 * \param m2: `int`
 * \param c1: `C1 *`
 * \param c1ao: `C1_AddOns *`
 * \param c4: `C4 *`
 * \param rac3j: `double *`
 */
dcomplex ghit_d(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1,
	      C1_AddOns *c1ao, C4 *c4, double *rac3j
);

/*! \brief Compute the transfer vector from N2 to N1.
 *
 * This function computes the transfer vector going from N2 to N1, using either
@@ -257,6 +279,19 @@ void r3j000(int j2, int j3, C6 *c6);
 */
void r3jjr(int j2, int j3, int m2, int m3, C6 *c6);

/*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions.
 *
 * This function calculates the 3j(J,J2,J3;-M2-M3,M2,M3) symbol for the Clebsch-Gordan
 * coefficients. See Appendix a.3.1 in Borghese, Denti & Saija (2007).
 *
 * \param j2: `int`
 * \param j3: `int`
 * \param m2: `int`
 * \param m3: `int`
 * \param rac3j: `double *`
 */
void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j);

/*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JM transitions.
 *
 * This function calculates the 3j(J,J2,J3;M1,M,-M1-M) symbol for the Clebsch-Gordan
+37 −34
Original line number Diff line number Diff line
@@ -837,12 +837,16 @@ C9::C9(int ndi, int _nlem, int ndit, int _nlemt) {
  nlemt = _nlemt;
  gis = new dcomplex*[ndi];
  gls = new dcomplex*[ndi];
  for (int gi = 0; gi < ndi; gi++) {
    gis[gi] = new dcomplex[nlem]();
    gls[gi] = new dcomplex[nlem]();
  // allocate these arrays to be contiguous, C-style
  gis[0] = new dcomplex[ndi*nlem]();
  gls[0]  = new dcomplex[ndi*nlem]();  
  for (int gi = 1; gi < ndi; gi++) {
    gis[gi] = gis[0] + gi*nlem;
    gls[gi] = gls[0] + gi*nlem;
  }
  sam = new dcomplex*[ndit];
  for (int si = 0; si < ndit; si++) sam[si] = new dcomplex[nlemt]();
  sam[0] = new dcomplex[ndit*nlemt]();
  for (int si = 1; si < ndit; si++) sam[si] = sam[0] + si*nlemt;
}

C9::C9(const C9& rhs) {
@@ -852,29 +856,29 @@ C9::C9(const C9& rhs) {
  nlemt = rhs.nlemt;
  gis = new dcomplex*[gis_size_0];
  gls = new dcomplex*[gis_size_0];
  for (int gi = 0; gi < gis_size_0; gi++) {
    gis[gi] = new dcomplex[nlem]();
    gls[gi] = new dcomplex[nlem]();
    for (int gj=0; gj<nlem; gj++) {
      gis[gi][gj] = rhs.gis[gi][gj];
      gls[gi][gj] = rhs.gls[gi][gj];
    }
  // allocate these arrays to be contiguous, C-style
  gis[0] = new dcomplex[gis_size_0*nlem]();
  gls[0]  = new dcomplex[gis_size_0*nlem]();  
  memcpy(gis[0], rhs.gis[0], gis_size_0*nlem*sizeof(dcomplex));
  memcpy(gls[0], rhs.gls[0], gis_size_0*nlem*sizeof(dcomplex));
  for (int gi = 1; gi < gis_size_0; gi++) {
    gis[gi] = gis[0] + gi*nlem;
    gls[gi] = gls[0] + gi*nlem;
  }
  sam = new dcomplex*[sam_size_0];
  for (int si = 0; si < sam_size_0; si++) {
    sam[si] = new dcomplex[nlemt]();
    for (int sj=0; sj<nlemt; sj++) sam[si][sj] = rhs.sam[si][sj];
  sam[0] = new dcomplex[sam_size_0*nlemt]();
  memcpy(sam[0], rhs.sam[0], sam_size_0*nlemt*sizeof(dcomplex));
  for (int si = 1; si < sam_size_0; si++) {
    sam[si] = sam[0]+si*nlemt;
  }
}

C9::~C9() {
  for (int gi = gis_size_0 - 1; gi > -1; gi--) {
    delete[] gis[gi];
    delete[] gls[gi];
  }
  delete[] gis[0];
  delete[] gls[0];
  delete[] gis;
  delete[] gls;
  for (int si = sam_size_0 - 1; si > -1; si--) delete[] sam[si];
  delete[] sam[0];
  delete[] sam;
}

@@ -886,16 +890,19 @@ C9::C9(const mixMPI *mpidata) {
  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
  gis = new dcomplex*[gis_size_0];
  gls = new dcomplex*[gis_size_0];
  for (int gi = 0; gi < gis_size_0; gi++) {
    gis[gi] = new dcomplex[nlem]();
    gls[gi] = new dcomplex[nlem]();
    MPI_Bcast(gis[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
    MPI_Bcast(gls[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  gis[0] = new dcomplex[gis_size_0*nlem]();
  gls[0]  = new dcomplex[gis_size_0*nlem]();  
  MPI_Bcast(gis[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  MPI_Bcast(gls[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  for (int gi = 1; gi < gis_size_0; gi++) {
    gis[gi] = gis[0] + gi*nlem;
    gls[gi] = gls[0] + gi*nlem;
  }
  sam = new dcomplex*[sam_size_0];
  for (int si = 0; si < sam_size_0; si++) {
    sam[si] = new dcomplex[nlemt]();
    MPI_Bcast(sam[si], nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  sam[0] = new dcomplex[sam_size_0*nlemt]();
  MPI_Bcast(sam[0], sam_size_0*nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  for (int si = 1; si < sam_size_0; si++) {
    sam[si] = sam[0]+si*nlemt;
  }
}

@@ -904,13 +911,9 @@ void C9::mpibcast(const mixMPI *mpidata) {
  MPI_Bcast(&sam_size_0, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nlem, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nlemt, 1, MPI_INT, 0, MPI_COMM_WORLD);
  for (int gi = 0; gi < gis_size_0; gi++) {
    MPI_Bcast(gis[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
    MPI_Bcast(gls[gi], nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  }
  for (int si = 0; si < sam_size_0; si++) {
    MPI_Bcast(sam[si], nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  }
  MPI_Bcast(gis[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  MPI_Bcast(gls[0], gis_size_0*nlem, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
  MPI_Bcast(sam[0], sam_size_0*nlemt, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD);
}
#endif

+554 −74

File changed.

Preview size limit exceeded, changes collapsed.

+2 −0
Original line number Diff line number Diff line
@@ -196,11 +196,13 @@ double cg1(int lmpml, int mu, int l, int m) {
  return result;
}

# pragma omp begin declare target device_type(any)
dcomplex dconjg(dcomplex z) {
  double zreal = real(z);
  double zimag = imag(z);
  return (zreal - zimag * I);
}
# pragma omp end declare target

void diel(int npntmo, int ns, int i, int ic, double vk, C1 *c1, C2 *c2) {
  const double dif = c1->rc[i - 1][ns] - c1->rc[i - 1][ns - 1];
Loading