Loading src/libnptm/clu_subs.cpp +21 −9 Original line number Diff line number Diff line Loading @@ -2110,17 +2110,21 @@ void scr2( cph = uim * exri * vkarg; int ls = (c4->li < c4->le) ? c4->li : c4->le; int kmax = (ls+1)*(ls+1)-1; c3->tsas[0][0] = cc0; c3->tsas[1][0] = cc0; c3->tsas[0][1] = cc0; c3->tsas[1][1] = cc0; dcomplex tsas00 = cc0; dcomplex tsas10 = cc0; dcomplex tsas01 = cc0; dcomplex tsas11 = cc0; // c3->tsas[0][0] = cc0; // c3->tsas[1][0] = cc0; // c3->tsas[0][1] = cc0; // c3->tsas[1][1] = cc0; dcomplex *vec_rmi = c1->rmi[0]; dcomplex *vec_rei = c1->rei[0]; dcomplex *vec_w = c1->w[0]; #ifdef USE_NVTX nvtxRangePush("scr2 outer loop 1"); #endif //#pragma omp parallel for #pragma omp parallel for reduction(+:tsas00, tsas10, tsas01, tsas11) for (int i14 = 1; i14 <= c4->nsph; i14++) { int i = i14 - 1; int iogi = c1->iog[i14 - 1]; Loading Loading @@ -2191,14 +2195,22 @@ void scr2( } // label 12 dcomplex phas = cexp(cph * (duk[0] * c1->rxx[i] + duk[1] * c1->ryy[i] + duk[2] * c1->rzz[i])); c3->tsas[0][0] += (c1->sas[iogi - 1][0][0] * phas); c3->tsas[1][0] += (c1->sas[iogi - 1][1][0] * phas); c3->tsas[0][1] += (c1->sas[iogi - 1][0][1] * phas); c3->tsas[1][1] += (c1->sas[iogi - 1][1][1] * phas); tsas00 += (c1->sas[iogi - 1][0][0] * phas); tsas10 += (c1->sas[iogi - 1][1][0] * phas); tsas01 += (c1->sas[iogi - 1][0][1] * phas); tsas11 += (c1->sas[iogi - 1][1][1] * phas); // c3->tsas[0][0] += (c1->sas[iogi - 1][0][0] * phas); // c3->tsas[1][0] += (c1->sas[iogi - 1][1][0] * phas); // c3->tsas[0][1] += (c1->sas[iogi - 1][0][1] * phas); // c3->tsas[1][1] += (c1->sas[iogi - 1][1][1] * phas); } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif c3->tsas[0][0] = tsas00; c3->tsas[1][0] = tsas10; c3->tsas[0][1] = tsas01; c3->tsas[1][1] = tsas11; dcomplex *vec_vints = c1->vints[0]; #ifdef USE_NVTX nvtxRangePush("scr2 outer loop 2"); Loading Loading
src/libnptm/clu_subs.cpp +21 −9 Original line number Diff line number Diff line Loading @@ -2110,17 +2110,21 @@ void scr2( cph = uim * exri * vkarg; int ls = (c4->li < c4->le) ? c4->li : c4->le; int kmax = (ls+1)*(ls+1)-1; c3->tsas[0][0] = cc0; c3->tsas[1][0] = cc0; c3->tsas[0][1] = cc0; c3->tsas[1][1] = cc0; dcomplex tsas00 = cc0; dcomplex tsas10 = cc0; dcomplex tsas01 = cc0; dcomplex tsas11 = cc0; // c3->tsas[0][0] = cc0; // c3->tsas[1][0] = cc0; // c3->tsas[0][1] = cc0; // c3->tsas[1][1] = cc0; dcomplex *vec_rmi = c1->rmi[0]; dcomplex *vec_rei = c1->rei[0]; dcomplex *vec_w = c1->w[0]; #ifdef USE_NVTX nvtxRangePush("scr2 outer loop 1"); #endif //#pragma omp parallel for #pragma omp parallel for reduction(+:tsas00, tsas10, tsas01, tsas11) for (int i14 = 1; i14 <= c4->nsph; i14++) { int i = i14 - 1; int iogi = c1->iog[i14 - 1]; Loading Loading @@ -2191,14 +2195,22 @@ void scr2( } // label 12 dcomplex phas = cexp(cph * (duk[0] * c1->rxx[i] + duk[1] * c1->ryy[i] + duk[2] * c1->rzz[i])); c3->tsas[0][0] += (c1->sas[iogi - 1][0][0] * phas); c3->tsas[1][0] += (c1->sas[iogi - 1][1][0] * phas); c3->tsas[0][1] += (c1->sas[iogi - 1][0][1] * phas); c3->tsas[1][1] += (c1->sas[iogi - 1][1][1] * phas); tsas00 += (c1->sas[iogi - 1][0][0] * phas); tsas10 += (c1->sas[iogi - 1][1][0] * phas); tsas01 += (c1->sas[iogi - 1][0][1] * phas); tsas11 += (c1->sas[iogi - 1][1][1] * phas); // c3->tsas[0][0] += (c1->sas[iogi - 1][0][0] * phas); // c3->tsas[1][0] += (c1->sas[iogi - 1][1][0] * phas); // c3->tsas[0][1] += (c1->sas[iogi - 1][0][1] * phas); // c3->tsas[1][1] += (c1->sas[iogi - 1][1][1] * phas); } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif c3->tsas[0][0] = tsas00; c3->tsas[1][0] = tsas10; c3->tsas[0][1] = tsas01; c3->tsas[1][1] = tsas11; dcomplex *vec_vints = c1->vints[0]; #ifdef USE_NVTX nvtxRangePush("scr2 outer loop 2"); Loading