Loading src/libnptm/clu_subs.cpp +25 −9 Original line number Diff line number Diff line Loading @@ -2146,24 +2146,19 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { double cccs = ccs / exdc; dcomplex csam = -(ccs / (exri * vk)) * 0.5 * I; //double scs = 0.0, ecs = 0.0, acs = 0.0; double scs = 0.0; double ecs = 0.0; double acs = 0.0; dcomplex tfsas = cc0; dcomplex *vec_rmi = c1->rmi[0]; dcomplex *vec_rei = c1->rei[0]; #ifdef USE_NVTX nvtxRangePush("scr0 outer loop"); nvtxRangePush("scr0 outer loop 1"); #endif //#pragma omp parallel for reduction(+:scs, ecs, acs, tfsas) #pragma omp parallel for for (int i14 = 1; i14 <= c4->nsph; i14++) { int iogi = c1->iog[i14 - 1]; if (iogi >= i14) { double sums = 0.0; dcomplex sum21 = cc0; #ifdef USE_NVTX nvtxRangePush("scr0 inner loop"); nvtxRangePush("scr0 inner loop 1"); #endif #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21) for (int l10 = 1; l10 <= c4->li; l10++) { Loading @@ -2177,6 +2172,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { sums += rvalue; sum21 += ((rm + re) * fl); } // l10 loop #ifdef USE_NVTX nvtxRangePop(); #endif sum21 *= -1.0; double scasec = cccs * sums; double extsec = -cccs * real(sum21); Loading @@ -2191,11 +2189,29 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { c1->fsas[i14 - 1] = sum21 * csam; } // label 12 // scs += c1->sscs[iogi - 1]; // ecs += c1->sexs[iogi - 1]; // acs += c1->sabs[iogi - 1]; // tfsas += c1->fsas[iogi - 1]; } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif double scs = 0.0; double ecs = 0.0; double acs = 0.0; dcomplex tfsas = cc0; #ifdef USE_NVTX nvtxRangePush("scr0 loop 2"); #endif #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas) for (int i14 = 1; i14 <= c4->nsph; i14++) { int iogi = c1->iog[i14 - 1]; scs += c1->sscs[iogi - 1]; ecs += c1->sexs[iogi - 1]; acs += c1->sabs[iogi - 1]; tfsas += c1->fsas[iogi - 1]; } // i14 loop } c3->scs = scs; c3->ecs = ecs; c3->acs = acs; Loading Loading
src/libnptm/clu_subs.cpp +25 −9 Original line number Diff line number Diff line Loading @@ -2146,24 +2146,19 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { double cccs = ccs / exdc; dcomplex csam = -(ccs / (exri * vk)) * 0.5 * I; //double scs = 0.0, ecs = 0.0, acs = 0.0; double scs = 0.0; double ecs = 0.0; double acs = 0.0; dcomplex tfsas = cc0; dcomplex *vec_rmi = c1->rmi[0]; dcomplex *vec_rei = c1->rei[0]; #ifdef USE_NVTX nvtxRangePush("scr0 outer loop"); nvtxRangePush("scr0 outer loop 1"); #endif //#pragma omp parallel for reduction(+:scs, ecs, acs, tfsas) #pragma omp parallel for for (int i14 = 1; i14 <= c4->nsph; i14++) { int iogi = c1->iog[i14 - 1]; if (iogi >= i14) { double sums = 0.0; dcomplex sum21 = cc0; #ifdef USE_NVTX nvtxRangePush("scr0 inner loop"); nvtxRangePush("scr0 inner loop 1"); #endif #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21) for (int l10 = 1; l10 <= c4->li; l10++) { Loading @@ -2177,6 +2172,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { sums += rvalue; sum21 += ((rm + re) * fl); } // l10 loop #ifdef USE_NVTX nvtxRangePop(); #endif sum21 *= -1.0; double scasec = cccs * sums; double extsec = -cccs * real(sum21); Loading @@ -2191,11 +2189,29 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { c1->fsas[i14 - 1] = sum21 * csam; } // label 12 // scs += c1->sscs[iogi - 1]; // ecs += c1->sexs[iogi - 1]; // acs += c1->sabs[iogi - 1]; // tfsas += c1->fsas[iogi - 1]; } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif double scs = 0.0; double ecs = 0.0; double acs = 0.0; dcomplex tfsas = cc0; #ifdef USE_NVTX nvtxRangePush("scr0 loop 2"); #endif #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas) for (int i14 = 1; i14 <= c4->nsph; i14++) { int iogi = c1->iog[i14 - 1]; scs += c1->sscs[iogi - 1]; ecs += c1->sexs[iogi - 1]; acs += c1->sabs[iogi - 1]; tfsas += c1->fsas[iogi - 1]; } // i14 loop } c3->scs = scs; c3->ecs = ecs; c3->acs = acs; Loading