Commit 0721edef authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use thread-private rac3j vector spaces in ghit(), cms_gpu() and ztm()

parent dacf2315
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -139,11 +139,12 @@ void crsm1(double vk, double exri, ParticleDescriptor *c1);
 * \param m1: `int`
 * \param l2: `int`
 * \param m2: `int`
 * \param c1: `ParticleDescriptor *` Poiunter to a ParticleDescriptor instance.
 * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance.
 * \param rac3j: `double *` J connection vector.
 */
dcomplex ghit(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2,
	      ParticleDescriptor *c1
  int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, ParticleDescriptor *c1,
  double *rac3j
);

/*! \brief Compute Hankel funtion and Bessel functions.
+14 −12
Original line number Diff line number Diff line
@@ -446,6 +446,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1) {
  const int li = c1->li;
  const int ndi = nsph * nlim;
  const int nsphmo = nsph - 1;
  double *rac3j = c1->rac3j;

  // int nbl = 0;
  for (int n1 = 1; n1 <= nsphmo; n1++) {
@@ -481,8 +482,8 @@ void cms(dcomplex **am, ParticleDescriptor *c1) {
	      int i2e = in2 + ilm2e;
	      int j2 = in1 + ilm2;
	      int j2e = in1 + ilm2e;
	      dcomplex cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1);
	      dcomplex cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1);
	      dcomplex cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j);
	      dcomplex cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j);
	      am[i1 - 1][i2 - 1] = cgh;
	      am[i1 - 1][i2e - 1] = cgk;
	      am[i1e - 1][i2 - 1] = cgk;
@@ -600,11 +601,13 @@ void cms_gpu(dcomplex **am, ParticleDescriptor *c1) {
    int j2e = (is_valid_iter) ? in1 + ilm2e : 0;
    // End of index 2 magnetic quantum numbers
    dcomplex cgh, cgk;
    double *rac3j = new double[c1->lmtpo];
    memcpy(rac3j, c1->rac3j, c1->lmtpo * sizeof(double));
    cgh = (is_valid_iter) ?
      ghit(0, 0, nbl, l1, m1, l2, m2, c1) :
      ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j) :
      cc0;
    cgk = (is_valid_iter) ?
      ghit(0, 1, nbl, l1, m1, l2, m2, c1) :
      ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j) :
      cc0;
    if (is_valid_iter) {
      am[i1 - 1][i2 - 1] = cgh;
@@ -616,6 +619,7 @@ void cms_gpu(dcomplex **am, ParticleDescriptor *c1) {
      am[j1e - 1][j2 - 1] = cgk * rsk;
      am[j1e - 1][j2e - 1] = cgh * rsh;
    }
    delete[] rac3j;
  }

  delete[] lut_n1;
@@ -772,8 +776,8 @@ void crsm1(double vk, double exri, ParticleDescriptor *c1) {
}

dcomplex ghit(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2,
	      ParticleDescriptor *c1
  int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, ParticleDescriptor *c1,
  double *rac3j
) {
  /* NBL identifies transfer vector going from N2 to N1;
   * IHI=0 for Hankel, IHI=1 for Bessel, IHI=2 for Bessel from origin;
@@ -800,9 +804,6 @@ dcomplex ghit(
  int lmaxpo = l2 + l1mp + 1;
  int i3j0in = c1->ind3j[l1mp][l2 - 1];
  int ilin = -1;
  const int lmtpo = c1->lmtpo;
  double *rac3j = new double[lmtpo];
  memcpy(rac3j, c1->rac3j, lmtpo * sizeof(double));
  if (m1mm2m > lminpo && (m1mm2m - lminpo) % 2 != 0) ilin = 0;
  int isn = 1;
  if (m1 % 2 != 0) isn *= -1;
@@ -981,7 +982,6 @@ dcomplex ghit(
    double cr = sqrt(four_pi * (l1 + l1mp) * (l1 + l1po) * (l2 + l2 + 1) / l1po);
    result *= (cr * uim);
  }
  delete[] rac3j;
  return result;
}

@@ -2361,6 +2361,7 @@ void ztm(dcomplex **am, ParticleDescriptor *c1) {
  // dcomplex gie, gle, a1, a2, a3, a4, sum1, sum2, sum3, sum4;
  const dcomplex cc0 = 0.0 + 0.0 * I;
  // int i2 = 0; // old implementation
  double *rac3j = new double[c1->lmtpo];
#ifdef USE_NVTX
  nvtxRangePush("ZTM starts");
#endif
@@ -2420,11 +2421,12 @@ void ztm(dcomplex **am, ParticleDescriptor *c1) {
	// gis_v[vecindex] = ghit_d(2, 0, n2, l2, m2, l3, m3, c1, rac3j_local);
	// gls_v[vecindex] = ghit_d(2, 1, n2, l2, m2, l3, m3, c1, rac3j_local);
	// free(rac3j_local);
	gis_v[vecindex] = ghit(2, 0, n2, l2, m2, l3, m3, c1);
	gls_v[vecindex] = ghit(2, 1, n2, l2, m2, l3, m3, c1);
	gis_v[vecindex] = ghit(2, 0, n2, l2, m2, l3, m3, c1, rac3j);
	gls_v[vecindex] = ghit(2, 1, n2, l2, m2, l3, m3, c1, rac3j);
      } // close k3 loop, former l3 + im3 loops
    } // close k2 loop, former l2 + im2 loops
  } // close n2 loop
  delete[] rac3j;
#ifdef USE_NVTX
  nvtxRangePop();
#endif