Loading src/libnptm/clu_subs.cpp +173 −170 Original line number Diff line number Diff line Loading @@ -165,17 +165,23 @@ void apcra( ) { const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; dcomplex a11, a12, a21, a22, fc; // dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; // dcomplex a11, a12, a21, a22, fc; double *vec_svw = new double[le * 12](); dcomplex *vec_svs = new dcomplex[le * 12](); dcomplex sum1 = cc0; dcomplex sum2 = cc0; int nlem = le * (le + 2); #pragma omp parallel { #pragma omp for for (int l28 = 1; l28 <= le; l28++) { int lpo = l28 + 1; int ltpo = lpo + l28; double fl = sqrt(1.0 * ltpo); for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l28 == 1 && ilmp == 1) || (l28 == le && ilmp == 3)) continue; // ilmp loop const int svw_start = 12 * (l28 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l28 + lmpml; double flmp = sqrt(1.0 * (lmp + lmp + 1)); Loading @@ -185,19 +191,19 @@ void apcra( if (inpol == 0) { double cgs = cgmpo + cgmmo; double cgd = cgmpo - cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgs; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgs; vec_svw[svw_start] = cgs; vec_svw[svw_start + 1] = cgd; vec_svw[svw_start + 2] = cgd; vec_svw[svw_start + 3] = cgs; } else { // label 22 vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = -cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgmmo; vec_svw[svw_start] = cgmpo; vec_svw[svw_start + 2] = cgmpo; vec_svw[svw_start + 1] = -cgmmo; vec_svw[svw_start + 3] = cgmmo; } // label 26 } // ilmp loop } // l28 loop } // l28 parallel loop // for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted // for (int ilmp = 1; ilmp <= 3; ilmp++) { // for (int ipa = 1; ipa <= 2; ipa++) { Loading @@ -207,16 +213,18 @@ void apcra( // } // ipa loop // } // ilmp loop // } // l30 loop #pragma omp for reduction(+: vec_svs[0:12*le]) for (int l58 = 1; l58 <= le; l58 ++) { int lpo = l58 + 1; int ltpo = l58 + lpo; int imm = l58 * lpo; for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l58 == 1 && ilmp == 1) || (l58 == le && ilmp == 3)) continue; // ilmp loop const int svs_start = 12 * (l58 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l58 + lmpml; int impmm = lmp * (lmp + 1); uimtl = uim * (1.0 * lmpml); dcomplex uimtl = uim * (1.0 * lmpml); if (lmpml == 0) uimtl = 1.0 + 0.0 * I; for (int im54 = 1; im54 <= ltpo; im54++) { int m = im54 - lpo; Loading @@ -239,7 +247,7 @@ void apcra( int lsmpml = ilsmp - 2; int lsmp = ls + lsmpml; int ismpmm = lsmp * (lsmp + 1); uimtls = -uim * (1.0 * lsmpml); dcomplex uimtls = -uim * (1.0 * lsmpml); if (lsmpml == 0) uimtls = 1.0 + 0.0 * I; for (int ims = 1; ims <= lstpo; ims++) { int ms = ims - lspo; Loading @@ -251,37 +259,43 @@ void apcra( int ismp = ismpmm + msmp; int ismpe = ismp + nlem; double cgcs = cg1(lsmpml, mu, ls, ms); fc = (uimtl * uimtls) * (cgc * cgcs); ca11 = dconjg(am0m[is - 1][i - 1]); ca12 = dconjg(am0m[is - 1][ie - 1]); ca21 = dconjg(am0m[ise - 1][i - 1]); ca22 = dconjg(am0m[ise - 1][ie - 1]); a11 = am0m[ismp - 1][imp - 1]; a12 = am0m[ismp - 1][impe - 1]; a21 = am0m[ismpe - 1][imp - 1]; a22 = am0m[ismpe - 1][impe - 1]; dcomplex fc = (uimtl * uimtls) * (cgc * cgcs); dcomplex ca11 = dconjg(am0m[is - 1][i - 1]); dcomplex ca12 = dconjg(am0m[is - 1][ie - 1]); dcomplex ca21 = dconjg(am0m[ise - 1][i - 1]); dcomplex ca22 = dconjg(am0m[ise - 1][ie - 1]); dcomplex a11 = am0m[ismp - 1][imp - 1]; dcomplex a12 = am0m[ismp - 1][impe - 1]; dcomplex a21 = am0m[ismpe - 1][imp - 1]; dcomplex a22 = am0m[ismpe - 1][impe - 1]; double z11 = zpv[ls - 1][ilsmp - 1][0][0]; double z12 = zpv[ls - 1][ilsmp - 1][0][1]; double z21 = zpv[ls - 1][ilsmp - 1][1][0]; double z22 = zpv[ls - 1][ilsmp - 1][1][1]; vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1)] += ( vec_svs[svs_start] += ( ( ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22 ) * fc ); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 1] += ((ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22) * fc); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 2] += ((ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22) * fc); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 3] += ((ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22) * fc); vec_svs[svs_start + 1] += ( ( ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22 ) * fc ); vec_svs[svs_start + 2] += ( ( ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22 ) * fc ); vec_svs[svs_start + 3] += ( ( ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22 ) * fc ); } // ends ims loop } // ims loop } // ilsmp loop Loading @@ -291,8 +305,7 @@ void apcra( } // im54 loop } // ilmp loop } // l58 loop dcomplex sum1 = cc0; dcomplex sum2 = cc0; #pragma omp for reduction(+: sum1, sum2) for (int l68 = 1; l68 <= le; l68++) { //int lpo = l68 + 1; //int ltpo = l68 + lpo; Loading Loading @@ -328,6 +341,7 @@ void apcra( } // label 66, ends ilmp loop } // ilmp loop } // l68 loop } // OMP parallel region const double half_pi = acos(0.0); double cofs = half_pi * 2.0 / sqk; gaprm[0][0] = 0.0; Loading @@ -352,18 +366,7 @@ void apcra( gappm[2][0] = cc0; gappm[2][1] = cc0; } // Clean memory // for (int i = le - 1; i > -1; i--) { // for (int j = 2; j > -1; j--) { // delete[] svw[i][j]; // delete[] svs[i][j]; // } // delete[] svw[i]; // delete[] svs[i]; // } // delete[] svw; // delete[] svs; delete[] vec_svw; delete[] vec_svs; } Loading Loading
src/libnptm/clu_subs.cpp +173 −170 Original line number Diff line number Diff line Loading @@ -165,17 +165,23 @@ void apcra( ) { const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; dcomplex a11, a12, a21, a22, fc; // dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; // dcomplex a11, a12, a21, a22, fc; double *vec_svw = new double[le * 12](); dcomplex *vec_svs = new dcomplex[le * 12](); dcomplex sum1 = cc0; dcomplex sum2 = cc0; int nlem = le * (le + 2); #pragma omp parallel { #pragma omp for for (int l28 = 1; l28 <= le; l28++) { int lpo = l28 + 1; int ltpo = lpo + l28; double fl = sqrt(1.0 * ltpo); for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l28 == 1 && ilmp == 1) || (l28 == le && ilmp == 3)) continue; // ilmp loop const int svw_start = 12 * (l28 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l28 + lmpml; double flmp = sqrt(1.0 * (lmp + lmp + 1)); Loading @@ -185,19 +191,19 @@ void apcra( if (inpol == 0) { double cgs = cgmpo + cgmmo; double cgd = cgmpo - cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgs; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgs; vec_svw[svw_start] = cgs; vec_svw[svw_start + 1] = cgd; vec_svw[svw_start + 2] = cgd; vec_svw[svw_start + 3] = cgs; } else { // label 22 vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = -cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgmmo; vec_svw[svw_start] = cgmpo; vec_svw[svw_start + 2] = cgmpo; vec_svw[svw_start + 1] = -cgmmo; vec_svw[svw_start + 3] = cgmmo; } // label 26 } // ilmp loop } // l28 loop } // l28 parallel loop // for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted // for (int ilmp = 1; ilmp <= 3; ilmp++) { // for (int ipa = 1; ipa <= 2; ipa++) { Loading @@ -207,16 +213,18 @@ void apcra( // } // ipa loop // } // ilmp loop // } // l30 loop #pragma omp for reduction(+: vec_svs[0:12*le]) for (int l58 = 1; l58 <= le; l58 ++) { int lpo = l58 + 1; int ltpo = l58 + lpo; int imm = l58 * lpo; for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l58 == 1 && ilmp == 1) || (l58 == le && ilmp == 3)) continue; // ilmp loop const int svs_start = 12 * (l58 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l58 + lmpml; int impmm = lmp * (lmp + 1); uimtl = uim * (1.0 * lmpml); dcomplex uimtl = uim * (1.0 * lmpml); if (lmpml == 0) uimtl = 1.0 + 0.0 * I; for (int im54 = 1; im54 <= ltpo; im54++) { int m = im54 - lpo; Loading @@ -239,7 +247,7 @@ void apcra( int lsmpml = ilsmp - 2; int lsmp = ls + lsmpml; int ismpmm = lsmp * (lsmp + 1); uimtls = -uim * (1.0 * lsmpml); dcomplex uimtls = -uim * (1.0 * lsmpml); if (lsmpml == 0) uimtls = 1.0 + 0.0 * I; for (int ims = 1; ims <= lstpo; ims++) { int ms = ims - lspo; Loading @@ -251,37 +259,43 @@ void apcra( int ismp = ismpmm + msmp; int ismpe = ismp + nlem; double cgcs = cg1(lsmpml, mu, ls, ms); fc = (uimtl * uimtls) * (cgc * cgcs); ca11 = dconjg(am0m[is - 1][i - 1]); ca12 = dconjg(am0m[is - 1][ie - 1]); ca21 = dconjg(am0m[ise - 1][i - 1]); ca22 = dconjg(am0m[ise - 1][ie - 1]); a11 = am0m[ismp - 1][imp - 1]; a12 = am0m[ismp - 1][impe - 1]; a21 = am0m[ismpe - 1][imp - 1]; a22 = am0m[ismpe - 1][impe - 1]; dcomplex fc = (uimtl * uimtls) * (cgc * cgcs); dcomplex ca11 = dconjg(am0m[is - 1][i - 1]); dcomplex ca12 = dconjg(am0m[is - 1][ie - 1]); dcomplex ca21 = dconjg(am0m[ise - 1][i - 1]); dcomplex ca22 = dconjg(am0m[ise - 1][ie - 1]); dcomplex a11 = am0m[ismp - 1][imp - 1]; dcomplex a12 = am0m[ismp - 1][impe - 1]; dcomplex a21 = am0m[ismpe - 1][imp - 1]; dcomplex a22 = am0m[ismpe - 1][impe - 1]; double z11 = zpv[ls - 1][ilsmp - 1][0][0]; double z12 = zpv[ls - 1][ilsmp - 1][0][1]; double z21 = zpv[ls - 1][ilsmp - 1][1][0]; double z22 = zpv[ls - 1][ilsmp - 1][1][1]; vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1)] += ( vec_svs[svs_start] += ( ( ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22 ) * fc ); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 1] += ((ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22) * fc); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 2] += ((ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22) * fc); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 3] += ((ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22) * fc); vec_svs[svs_start + 1] += ( ( ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22 ) * fc ); vec_svs[svs_start + 2] += ( ( ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22 ) * fc ); vec_svs[svs_start + 3] += ( ( ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22 ) * fc ); } // ends ims loop } // ims loop } // ilsmp loop Loading @@ -291,8 +305,7 @@ void apcra( } // im54 loop } // ilmp loop } // l58 loop dcomplex sum1 = cc0; dcomplex sum2 = cc0; #pragma omp for reduction(+: sum1, sum2) for (int l68 = 1; l68 <= le; l68++) { //int lpo = l68 + 1; //int ltpo = l68 + lpo; Loading Loading @@ -328,6 +341,7 @@ void apcra( } // label 66, ends ilmp loop } // ilmp loop } // l68 loop } // OMP parallel region const double half_pi = acos(0.0); double cofs = half_pi * 2.0 / sqk; gaprm[0][0] = 0.0; Loading @@ -352,18 +366,7 @@ void apcra( gappm[2][0] = cc0; gappm[2][1] = cc0; } // Clean memory // for (int i = le - 1; i > -1; i--) { // for (int j = 2; j > -1; j--) { // delete[] svw[i][j]; // delete[] svs[i][j]; // } // delete[] svw[i]; // delete[] svs[i]; // } // delete[] svw; // delete[] svs; delete[] vec_svw; delete[] vec_svs; } Loading