Loading src/trapping/cfrfme.cpp +8 −22 Original line number Diff line number Diff line Loading @@ -514,9 +514,8 @@ void frfme(string data_file, string output_path) { double z = _zv[iz75] + frsh; double y = _yv[iy70]; double x = _xv[ix65]; double rsumy = 0.0; double isumy = 0.0; #pragma omp parallel for simd reduction(+:rsumy, isumy) dcomplex sumy = cc0; #pragma omp parallel for simd reduction(+:sumy) for (int jy60x55 = 0; jy60x55 < nkvs ; jy60x55++) { int jy60 = jy60x55 / nkv; int jx55 = jy60x55 % nkv; Loading @@ -526,44 +525,31 @@ void frfme(string data_file, string output_path) { // jx55 = 0: phasf double vkx = vkv[nkv - 1]; double vkz = vec_vkzm[jy60]; double angle = -vkx * x + vky * y + vkz * z; double s, c; sincos(angle, &s, &c); dcomplex phasf = c + uim * s; dcomplex phasf = cexp(uim * (-vkx * x + vky * y + vkz * z)); dcomplex term = vec_w[jy60] * phasf * 0.5; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; term *= factor; rsumy += (real(term)); isumy += (imag(term)); sumy += term; } else if (jx55 == nkv - 1) { // jx55 = nkv - 1: phasl double vkx = vkv[nkv - 1]; double vkz = vec_vkzm[(nkv - 1) * nkv + jy60]; double angle = vkx * x + vky * y + vkz * z; double s, c; sincos(angle, &s, &c); dcomplex phasl = c + uim * s; dcomplex phasl = cexp(uim * (vkx * x + vky * y + vkz * z)); dcomplex term = vec_w[(nkv - 1) * nkv + jy60] * phasl * 0.5; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; term *= factor; rsumy += (real(term)); isumy += (imag(term)); sumy += term; } else { // 1 <= jx55 < nkv - 1 double vkx = vkv[jx55]; double vkz = vec_vkzm[(jx55) * nkv + jy60]; double angle = vkx * x + vky * y + vkz * z; double s, c; sincos(angle, &s, &c); dcomplex phas = c + uim * s; dcomplex phas = cexp(uim * (vkx * x + vky * y + vkz * z)); dcomplex term = vec_w[(jx55) * nkv + jy60] * phas; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; term *= factor; rsumy += (real(term)); isumy += (imag(term)); sumy += term; } } // jy60x55 loop dcomplex sumy = rsumy + uim * isumy; vec_wsum[(j80 * nrvc) + ixyz] = sumy * delks; } // ixyz loop } // j80 loop Loading Loading
src/trapping/cfrfme.cpp +8 −22 Original line number Diff line number Diff line Loading @@ -514,9 +514,8 @@ void frfme(string data_file, string output_path) { double z = _zv[iz75] + frsh; double y = _yv[iy70]; double x = _xv[ix65]; double rsumy = 0.0; double isumy = 0.0; #pragma omp parallel for simd reduction(+:rsumy, isumy) dcomplex sumy = cc0; #pragma omp parallel for simd reduction(+:sumy) for (int jy60x55 = 0; jy60x55 < nkvs ; jy60x55++) { int jy60 = jy60x55 / nkv; int jx55 = jy60x55 % nkv; Loading @@ -526,44 +525,31 @@ void frfme(string data_file, string output_path) { // jx55 = 0: phasf double vkx = vkv[nkv - 1]; double vkz = vec_vkzm[jy60]; double angle = -vkx * x + vky * y + vkz * z; double s, c; sincos(angle, &s, &c); dcomplex phasf = c + uim * s; dcomplex phasf = cexp(uim * (-vkx * x + vky * y + vkz * z)); dcomplex term = vec_w[jy60] * phasf * 0.5; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; term *= factor; rsumy += (real(term)); isumy += (imag(term)); sumy += term; } else if (jx55 == nkv - 1) { // jx55 = nkv - 1: phasl double vkx = vkv[nkv - 1]; double vkz = vec_vkzm[(nkv - 1) * nkv + jy60]; double angle = vkx * x + vky * y + vkz * z; double s, c; sincos(angle, &s, &c); dcomplex phasl = c + uim * s; dcomplex phasl = cexp(uim * (vkx * x + vky * y + vkz * z)); dcomplex term = vec_w[(nkv - 1) * nkv + jy60] * phasl * 0.5; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; term *= factor; rsumy += (real(term)); isumy += (imag(term)); sumy += term; } else { // 1 <= jx55 < nkv - 1 double vkx = vkv[jx55]; double vkz = vec_vkzm[(jx55) * nkv + jy60]; double angle = vkx * x + vky * y + vkz * z; double s, c; sincos(angle, &s, &c); dcomplex phas = c + uim * s; dcomplex phas = cexp(uim * (vkx * x + vky * y + vkz * z)); dcomplex term = vec_w[(jx55) * nkv + jy60] * phas; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; term *= factor; rsumy += (real(term)); isumy += (imag(term)); sumy += term; } } // jy60x55 loop dcomplex sumy = rsumy + uim * isumy; vec_wsum[(j80 * nrvc) + ixyz] = sumy * delks; } // ixyz loop } // j80 loop Loading