Commit 19e36857 authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

fix omp parallel loop in scr0()

parent e5339261
Loading
Loading
Loading
Loading
+25 −9
Original line number Diff line number Diff line
@@ -2146,24 +2146,19 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) {
  double cccs = ccs / exdc;
  dcomplex csam = -(ccs / (exri * vk)) * 0.5 * I;
  //double scs = 0.0, ecs = 0.0, acs = 0.0;
  double scs = 0.0;
  double ecs = 0.0;
  double acs = 0.0;
  dcomplex tfsas = cc0;
  dcomplex *vec_rmi = c1->rmi[0];
  dcomplex *vec_rei = c1->rei[0];
#ifdef USE_NVTX
  nvtxRangePush("scr0 outer loop");
  nvtxRangePush("scr0 outer loop 1");
#endif

  //#pragma omp parallel for reduction(+:scs, ecs, acs, tfsas)
#pragma omp parallel for
  for (int i14 = 1; i14 <= c4->nsph; i14++) {
    int iogi = c1->iog[i14 - 1];
    if (iogi >= i14) {
      double sums = 0.0;
      dcomplex sum21 = cc0;
#ifdef USE_NVTX
      nvtxRangePush("scr0 inner loop");
      nvtxRangePush("scr0 inner loop 1");
#endif
#pragma omp target teams distribute parallel for simd reduction(+:sums, sum21)
      for (int l10 = 1; l10 <= c4->li; l10++) {
@@ -2177,6 +2172,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) {
	sums += rvalue;
	sum21 += ((rm + re) * fl);
      } // l10 loop
#ifdef USE_NVTX
      nvtxRangePop();
#endif
      sum21 *= -1.0;
      double scasec = cccs * sums;
      double extsec = -cccs * real(sum21);
@@ -2191,11 +2189,29 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) {
      c1->fsas[i14 - 1] = sum21 * csam;
    }
    // label 12
    // scs += c1->sscs[iogi - 1];
    // ecs += c1->sexs[iogi - 1];
    // acs += c1->sabs[iogi - 1];
    // tfsas += c1->fsas[iogi - 1];
  } // i14 loop
#ifdef USE_NVTX
  nvtxRangePop();
#endif
  double scs = 0.0;
  double ecs = 0.0;
  double acs = 0.0;
  dcomplex tfsas = cc0;
#ifdef USE_NVTX
      nvtxRangePush("scr0 loop 2");
#endif
#pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas)
  for (int i14 = 1; i14 <= c4->nsph; i14++) {
    int iogi = c1->iog[i14 - 1];
    scs += c1->sscs[iogi - 1];
    ecs += c1->sexs[iogi - 1];
    acs += c1->sabs[iogi - 1];
    tfsas += c1->fsas[iogi - 1];
  } // i14 loop
  }
  c3->scs = scs;
  c3->ecs = ecs;
  c3->acs = acs;