Loading src/libnptm/clu_subs.cpp +9 −23 Original line number Diff line number Diff line Loading @@ -1312,7 +1312,7 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) { nvtxRangePush("whole pcros"); #endif const dcomplex cc0 = 0.0 + 0.0 * I; dcomplex sump, sum1, sum2, sum3, sum4, am, amp, cc, csam; dcomplex sump, sum1, sum2, sum3, sum4, cc, csam; const double exdc = exri * exri; double ccs = 1.0 / (vk * vk); double cccs = ccs / exdc; Loading Loading @@ -1373,7 +1373,6 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) { c1->sac[jpo][ipo18] = csam * sum4; } // ipo18 loop int i = 0; dcomplex * &vint = c1->vint; #ifdef USE_NVTX nvtxRangePush("pcros loop 2"); #endif Loading Loading @@ -2023,8 +2022,6 @@ void raba( } //i20 loop #ifdef USE_NVTX nvtxRangePop(); #endif #ifdef USE_NVTX nvtxRangePush("raba outer loop 2"); #endif #ifdef USE_TARGET_OFFLOAD Loading Loading @@ -2246,15 +2243,11 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) { c1->sqabs[i14 - 1] = abssec / gcss; c1->fsas[i14 - 1] = sum21 * csam; } // label 12 // scs += c1->sscs[iogi - 1]; // ecs += c1->sexs[iogi - 1]; // acs += c1->sabs[iogi - 1]; // tfsas += c1->fsas[iogi - 1]; } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif // label 12 double scs = 0.0; double ecs = 0.0; double acs = 0.0; Loading Loading @@ -2367,16 +2360,11 @@ void scr2( vec_sas[vecindex+1] = s12 * csam; vec_sas[vecindex+3] = s22 * csam; } // label 12 // dcomplex phas = cexp(cph * (duk[0] * c1->rxx[i] + duk[1] * c1->ryy[i] + duk[2] * c1->rzz[i])); // tsas00 += (c1->sas[iogi - 1][0][0] * phas); // tsas10 += (c1->sas[iogi - 1][1][0] * phas); // tsas01 += (c1->sas[iogi - 1][0][1] * phas); // tsas11 += (c1->sas[iogi - 1][1][1] * phas); } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif // label 12 dcomplex tsas00 = cc0; dcomplex tsas10 = cc0; dcomplex tsas01 = cc0; Loading Loading @@ -2405,9 +2393,7 @@ void scr2( c1->tsas[1][1] = tsas11; #ifdef USE_NVTX nvtxRangePop(); //#endif //dcomplex *vec_vints = c1->vints[0]; //#ifdef USE_NVTX nvtxRangePush("scr2 outer loop 3"); #endif #pragma omp parallel for Loading Loading
src/libnptm/clu_subs.cpp +9 −23 Original line number Diff line number Diff line Loading @@ -1312,7 +1312,7 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) { nvtxRangePush("whole pcros"); #endif const dcomplex cc0 = 0.0 + 0.0 * I; dcomplex sump, sum1, sum2, sum3, sum4, am, amp, cc, csam; dcomplex sump, sum1, sum2, sum3, sum4, cc, csam; const double exdc = exri * exri; double ccs = 1.0 / (vk * vk); double cccs = ccs / exdc; Loading Loading @@ -1373,7 +1373,6 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) { c1->sac[jpo][ipo18] = csam * sum4; } // ipo18 loop int i = 0; dcomplex * &vint = c1->vint; #ifdef USE_NVTX nvtxRangePush("pcros loop 2"); #endif Loading Loading @@ -2023,8 +2022,6 @@ void raba( } //i20 loop #ifdef USE_NVTX nvtxRangePop(); #endif #ifdef USE_NVTX nvtxRangePush("raba outer loop 2"); #endif #ifdef USE_TARGET_OFFLOAD Loading Loading @@ -2246,15 +2243,11 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) { c1->sqabs[i14 - 1] = abssec / gcss; c1->fsas[i14 - 1] = sum21 * csam; } // label 12 // scs += c1->sscs[iogi - 1]; // ecs += c1->sexs[iogi - 1]; // acs += c1->sabs[iogi - 1]; // tfsas += c1->fsas[iogi - 1]; } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif // label 12 double scs = 0.0; double ecs = 0.0; double acs = 0.0; Loading Loading @@ -2367,16 +2360,11 @@ void scr2( vec_sas[vecindex+1] = s12 * csam; vec_sas[vecindex+3] = s22 * csam; } // label 12 // dcomplex phas = cexp(cph * (duk[0] * c1->rxx[i] + duk[1] * c1->ryy[i] + duk[2] * c1->rzz[i])); // tsas00 += (c1->sas[iogi - 1][0][0] * phas); // tsas10 += (c1->sas[iogi - 1][1][0] * phas); // tsas01 += (c1->sas[iogi - 1][0][1] * phas); // tsas11 += (c1->sas[iogi - 1][1][1] * phas); } // i14 loop #ifdef USE_NVTX nvtxRangePop(); #endif // label 12 dcomplex tsas00 = cc0; dcomplex tsas10 = cc0; dcomplex tsas01 = cc0; Loading Loading @@ -2405,9 +2393,7 @@ void scr2( c1->tsas[1][1] = tsas11; #ifdef USE_NVTX nvtxRangePop(); //#endif //dcomplex *vec_vints = c1->vints[0]; //#ifdef USE_NVTX nvtxRangePush("scr2 outer loop 3"); #endif #pragma omp parallel for Loading