Loading src/libnptm/clu_subs.cpp +44 −51 Original line number Diff line number Diff line Loading @@ -53,33 +53,26 @@ void apc( double ****zpv, int le, dcomplex **am0m, dcomplex **w, double sqk, double **gapr, dcomplex **gapp ) { dcomplex **ac, **gap; dcomplex *vec_ac, *vec_gap; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimmp, summ, sume, suem, suee, summp, sumep; dcomplex suemp, sueep; // dcomplex uimmp, summ, sume, suem, suee, summp, sumep; // dcomplex suemp, sueep; double cof = 1.0 / sqk; double cimu = cof / sqrt(2.0); int nlem = le * (le + 2); const int nlemt = nlem + nlem; ac = new dcomplex*[nlemt]; gap = new dcomplex*[3]; for (int ai = 0; ai < nlemt; ai++) ac[ai] = new dcomplex[2](); for (int gi = 0; gi < 3; gi++) gap[gi] = new dcomplex[2](); for (int j45 = 1; j45 <= nlemt; j45++) { int j = j45 - 1; ac[j][0] = cc0; ac[j][1] = cc0; for (int i45 = 1; i45 <= nlemt; i45++) { int i = i45 - 1; ac[j][0] += (am0m[j][i] * w[i][0]); ac[j][1] += (am0m[j][i] * w[i][1]); vec_ac = new dcomplex[2 * nlemt](); vec_gap = new dcomplex[6](); #pragma omp parallel for simd for (int j45 = 0; j45 < nlemt; j45++) { for (int i45 = 0; i45 < nlemt; i45++) { vec_ac[2 * j45] += (am0m[j45][i45] * w[i45][0]); vec_ac[2 * j45 + 1] += (am0m[j45][i45] * w[i45][1]); } //i45 loop } //j45 loop for (int imu90 = 1; imu90 <=3; imu90++) { int mu = imu90 - 2; gap[imu90 - 1][0] = cc0; gap[imu90 - 1][1] = cc0; gapp[imu90 - 1][0] = cc0; gapp[imu90 - 1][1] = cc0; for (int l80 = 1; l80 <= le; l80++) { Loading @@ -90,7 +83,7 @@ void apc( if ((l80 == 1 && ilmp == 1) || (l80 == le && ilmp == 3)) continue; // ilmp loop int lmpml = ilmp - 2; int lmp = l80 + lmpml; uimmp = (-1.0 * lmpml) * uim; dcomplex uimmp = (-1.0 * lmpml) * uim; int impmmmp = lmp * (lmp + 1); for (int im70 = 1; im70 <= ltpo; im70++) { int m = im70 - lpo; Loading @@ -104,15 +97,16 @@ void apc( double cgc = cg1(lmpml, mu, l80, m); int jpo = 2; for (int ipo = 1; ipo <= 2; ipo++) { dcomplex summ, sume, suem, suee, summp, sumep, suemp, sueep; if (ipo == 2) jpo = 1; summ = dconjg(ac[i - 1][ipo - 1]) * ac[imp - 1][ipo - 1]; sume = dconjg(ac[i - 1][ipo - 1]) * ac[impe - 1][ipo - 1]; suem = dconjg(ac[ie - 1][ipo - 1]) * ac[imp - 1][ipo - 1]; suee = dconjg(ac[ie - 1][ipo - 1]) * ac[impe - 1][ipo - 1]; summp = dconjg(ac[i - 1][jpo - 1]) * ac[imp - 1][ipo - 1]; sumep = dconjg(ac[i - 1][jpo - 1]) * ac[impe - 1][ipo - 1]; suemp = dconjg(ac[ie - 1][jpo - 1]) * ac[imp - 1][ipo - 1]; sueep = dconjg(ac[ie - 1][jpo - 1]) * ac[impe - 1][ipo - 1]; summ = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sume = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suem = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; suee = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; summp = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sumep = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suemp = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sueep = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; if (lmpml != 0) { summ *= uimmp; sume *= uimmp; Loading @@ -124,7 +118,7 @@ void apc( sueep *= uimmp; } // label 55 gap[imu90 - 1][ipo - 1] += ( vec_gap[2 * (imu90 - 1) + ipo - 1] += ( ( summ * zpv[l80 - 1][ilmp - 1][0][0] + sume * zpv[l80 - 1][ilmp - 1][0][1] Loading @@ -147,9 +141,10 @@ void apc( } // l80 loop } // imu90 loop for (int ipo95 = 1; ipo95 <= 2; ipo95++) { sume = gap[0][ipo95 - 1] * cimu; suee = gap[1][ipo95 - 1] * cof; suem = gap[2][ipo95 - 1] * cimu; dcomplex sume, suee, suem, sumep, sueep, suemp; sume = vec_gap[ipo95 - 1] * cimu; suee = vec_gap[2 + ipo95 - 1] * cof; suem = vec_gap[4 + ipo95 - 1] * cimu; gapr[0][ipo95 - 1] = real(sume - suem); gapr[1][ipo95 - 1] = real((sume + suem) * uim); gapr[2][ipo95 - 1] = real(suee); Loading @@ -161,10 +156,8 @@ void apc( gapp[2][ipo95 - 1] = sueep; } // ipo95 loop // Clean memory for (int ai = nlemt - 1; ai > -1; ai--) delete[] ac[ai]; for (int gi = 2; gi > -1; gi--) delete[] gap[gi]; delete[] ac; delete[] gap; delete[] vec_ac; delete[] vec_gap; } void apcra( Loading Loading
src/libnptm/clu_subs.cpp +44 −51 Original line number Diff line number Diff line Loading @@ -53,33 +53,26 @@ void apc( double ****zpv, int le, dcomplex **am0m, dcomplex **w, double sqk, double **gapr, dcomplex **gapp ) { dcomplex **ac, **gap; dcomplex *vec_ac, *vec_gap; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimmp, summ, sume, suem, suee, summp, sumep; dcomplex suemp, sueep; // dcomplex uimmp, summ, sume, suem, suee, summp, sumep; // dcomplex suemp, sueep; double cof = 1.0 / sqk; double cimu = cof / sqrt(2.0); int nlem = le * (le + 2); const int nlemt = nlem + nlem; ac = new dcomplex*[nlemt]; gap = new dcomplex*[3]; for (int ai = 0; ai < nlemt; ai++) ac[ai] = new dcomplex[2](); for (int gi = 0; gi < 3; gi++) gap[gi] = new dcomplex[2](); for (int j45 = 1; j45 <= nlemt; j45++) { int j = j45 - 1; ac[j][0] = cc0; ac[j][1] = cc0; for (int i45 = 1; i45 <= nlemt; i45++) { int i = i45 - 1; ac[j][0] += (am0m[j][i] * w[i][0]); ac[j][1] += (am0m[j][i] * w[i][1]); vec_ac = new dcomplex[2 * nlemt](); vec_gap = new dcomplex[6](); #pragma omp parallel for simd for (int j45 = 0; j45 < nlemt; j45++) { for (int i45 = 0; i45 < nlemt; i45++) { vec_ac[2 * j45] += (am0m[j45][i45] * w[i45][0]); vec_ac[2 * j45 + 1] += (am0m[j45][i45] * w[i45][1]); } //i45 loop } //j45 loop for (int imu90 = 1; imu90 <=3; imu90++) { int mu = imu90 - 2; gap[imu90 - 1][0] = cc0; gap[imu90 - 1][1] = cc0; gapp[imu90 - 1][0] = cc0; gapp[imu90 - 1][1] = cc0; for (int l80 = 1; l80 <= le; l80++) { Loading @@ -90,7 +83,7 @@ void apc( if ((l80 == 1 && ilmp == 1) || (l80 == le && ilmp == 3)) continue; // ilmp loop int lmpml = ilmp - 2; int lmp = l80 + lmpml; uimmp = (-1.0 * lmpml) * uim; dcomplex uimmp = (-1.0 * lmpml) * uim; int impmmmp = lmp * (lmp + 1); for (int im70 = 1; im70 <= ltpo; im70++) { int m = im70 - lpo; Loading @@ -104,15 +97,16 @@ void apc( double cgc = cg1(lmpml, mu, l80, m); int jpo = 2; for (int ipo = 1; ipo <= 2; ipo++) { dcomplex summ, sume, suem, suee, summp, sumep, suemp, sueep; if (ipo == 2) jpo = 1; summ = dconjg(ac[i - 1][ipo - 1]) * ac[imp - 1][ipo - 1]; sume = dconjg(ac[i - 1][ipo - 1]) * ac[impe - 1][ipo - 1]; suem = dconjg(ac[ie - 1][ipo - 1]) * ac[imp - 1][ipo - 1]; suee = dconjg(ac[ie - 1][ipo - 1]) * ac[impe - 1][ipo - 1]; summp = dconjg(ac[i - 1][jpo - 1]) * ac[imp - 1][ipo - 1]; sumep = dconjg(ac[i - 1][jpo - 1]) * ac[impe - 1][ipo - 1]; suemp = dconjg(ac[ie - 1][jpo - 1]) * ac[imp - 1][ipo - 1]; sueep = dconjg(ac[ie - 1][jpo - 1]) * ac[impe - 1][ipo - 1]; summ = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sume = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suem = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; suee = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; summp = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sumep = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suemp = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sueep = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; if (lmpml != 0) { summ *= uimmp; sume *= uimmp; Loading @@ -124,7 +118,7 @@ void apc( sueep *= uimmp; } // label 55 gap[imu90 - 1][ipo - 1] += ( vec_gap[2 * (imu90 - 1) + ipo - 1] += ( ( summ * zpv[l80 - 1][ilmp - 1][0][0] + sume * zpv[l80 - 1][ilmp - 1][0][1] Loading @@ -147,9 +141,10 @@ void apc( } // l80 loop } // imu90 loop for (int ipo95 = 1; ipo95 <= 2; ipo95++) { sume = gap[0][ipo95 - 1] * cimu; suee = gap[1][ipo95 - 1] * cof; suem = gap[2][ipo95 - 1] * cimu; dcomplex sume, suee, suem, sumep, sueep, suemp; sume = vec_gap[ipo95 - 1] * cimu; suee = vec_gap[2 + ipo95 - 1] * cof; suem = vec_gap[4 + ipo95 - 1] * cimu; gapr[0][ipo95 - 1] = real(sume - suem); gapr[1][ipo95 - 1] = real((sume + suem) * uim); gapr[2][ipo95 - 1] = real(suee); Loading @@ -161,10 +156,8 @@ void apc( gapp[2][ipo95 - 1] = sueep; } // ipo95 loop // Clean memory for (int ai = nlemt - 1; ai > -1; ai--) delete[] ac[ai]; for (int gi = 2; gi > -1; gi--) delete[] gap[gi]; delete[] ac; delete[] gap; delete[] vec_ac; delete[] vec_gap; } void apcra( Loading