Loading src/libnptm/clu_subs.cpp +244 −242 Original line number Diff line number Diff line Loading @@ -56,22 +56,21 @@ void apc( dcomplex *vec_ac, *vec_gap; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimmp, summ, sume, suem, suee, summp, sumep; dcomplex suemp, sueep; // dcomplex uimmp, summ, sume, suem, suee, summp, sumep; // dcomplex suemp, sueep; double cof = 1.0 / sqk; double cimu = cof / sqrt(2.0); int nlem = le * (le + 2); const int nlemt = nlem + nlem; const int nlemf = 2 * nlemt; vec_ac = new dcomplex[nlemf](); vec_ac = new dcomplex[2 * nlemt](); vec_gap = new dcomplex[6](); #pragma omp parallel for reduction(+: vec_ac[0:nlemf]) for (np_int ij = 0; ij < nlemt * nlemt; ij++) { np_int j45 = ij / nlemt; np_int i45 = ij % nlemt; #pragma omp parallel for simd for (int j45 = 0; j45 < nlemt; j45++) { for (int i45 = 0; i45 < nlemt; i45++) { vec_ac[2 * j45] += (am0m[j45][i45] * w[i45][0]); vec_ac[2 * j45 + 1] += (am0m[j45][i45] * w[i45][1]); } // ij loop } //i45 loop } //j45 loop for (int imu90 = 1; imu90 <=3; imu90++) { int mu = imu90 - 2; gapp[imu90 - 1][0] = cc0; Loading @@ -84,7 +83,7 @@ void apc( if ((l80 == 1 && ilmp == 1) || (l80 == le && ilmp == 3)) continue; // ilmp loop int lmpml = ilmp - 2; int lmp = l80 + lmpml; uimmp = (-1.0 * lmpml) * uim; dcomplex uimmp = (-1.0 * lmpml) * uim; int impmmmp = lmp * (lmp + 1); for (int im70 = 1; im70 <= ltpo; im70++) { int m = im70 - lpo; Loading @@ -96,17 +95,18 @@ void apc( int imp = impmmmp + mmp; int impe = imp + nlem; double cgc = cg1(lmpml, mu, l80, m); int jpo = 1; for (int ipo = 0; ipo < 2; ipo++) { if (ipo == 1) jpo = 0; summ = dconjg(vec_ac[2 * (i - 1) + ipo]) * vec_ac[2 * (imp - 1) + ipo]; sume = dconjg(vec_ac[2 * (i - 1) + ipo]) * vec_ac[2 * (impe - 1) + ipo]; suem = dconjg(vec_ac[2 * (ie - 1) + ipo]) * vec_ac[2 * (imp - 1) + ipo]; suee = dconjg(vec_ac[2 * (ie - 1) + ipo]) * vec_ac[2 * (impe - 1) + ipo]; summp = dconjg(vec_ac[2 * (i - 1) + jpo]) * vec_ac[2 * (imp - 1) + ipo]; sumep = dconjg(vec_ac[2 * (i - 1) + jpo]) * vec_ac[2 * (impe - 1) + ipo]; suemp = dconjg(vec_ac[2 * (ie - 1) + jpo]) * vec_ac[2 * (imp - 1) + ipo]; sueep = dconjg(vec_ac[2 * (ie - 1) + jpo]) * vec_ac[2 * (impe - 1) +ipo]; int jpo = 2; for (int ipo = 1; ipo <= 2; ipo++) { dcomplex summ, sume, suem, suee, summp, sumep, suemp, sueep; if (ipo == 2) jpo = 1; summ = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sume = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suem = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; suee = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; summp = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sumep = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suemp = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sueep = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; if (lmpml != 0) { summ *= uimmp; sume *= uimmp; Loading @@ -118,7 +118,7 @@ void apc( sueep *= uimmp; } // label 55 vec_gap[2 * (imu90 - 1) + ipo] += ( vec_gap[2 * (imu90 - 1) + ipo - 1] += ( ( summ * zpv[l80 - 1][ilmp - 1][0][0] + sume * zpv[l80 - 1][ilmp - 1][0][1] Loading @@ -126,7 +126,7 @@ void apc( + suee * zpv[l80 - 1][ilmp - 1][1][1] ) * cgc ); gapp[imu90 - 1][ipo] += ( gapp[imu90 - 1][ipo - 1] += ( ( summp * zpv[l80 - 1][ilmp - 1][0][0] + sumep * zpv[l80 - 1][ilmp - 1][0][1] Loading @@ -140,19 +140,20 @@ void apc( } // ilmp loop } // l80 loop } // imu90 loop for (int ipo95 = 0; ipo95 < 2; ipo95++) { sume = vec_gap[ipo95] * cimu; suee = vec_gap[2 + ipo95] * cof; suem = vec_gap[4 + ipo95] * cimu; gapr[0][ipo95] = real(sume - suem); gapr[1][ipo95] = real((sume + suem) * uim); gapr[2][ipo95] = real(suee); sumep = gapp[0][ipo95] * cimu; sueep = gapp[1][ipo95] * cof; suemp = gapp[2][ipo95] * cimu; gapp[0][ipo95] = sumep - suemp; gapp[1][ipo95] = (sumep + suemp) * uim; gapp[2][ipo95] = sueep; for (int ipo95 = 1; ipo95 <= 2; ipo95++) { dcomplex sume, suee, suem, sumep, sueep, suemp; sume = vec_gap[ipo95 - 1] * cimu; suee = vec_gap[2 + ipo95 - 1] * cof; suem = vec_gap[4 + ipo95 - 1] * cimu; gapr[0][ipo95 - 1] = real(sume - suem); gapr[1][ipo95 - 1] = real((sume + suem) * uim); gapr[2][ipo95 - 1] = real(suee); sumep = gapp[0][ipo95 - 1] * cimu; sueep = gapp[1][ipo95 - 1] * cof; suemp = gapp[2][ipo95 - 1] * cimu; gapp[0][ipo95 - 1] = sumep - suemp; gapp[1][ipo95 - 1] = (sumep + suemp) * uim; gapp[2][ipo95 - 1] = sueep; } // ipo95 loop // Clean memory delete[] vec_ac; Loading @@ -165,23 +166,29 @@ void apcra( ) { const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; // dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; // dcomplex a11, a12, a21, a22, fc; double *vec_svw = new double[le * 12](); dcomplex *vec_svs = new dcomplex[le * 12](); dcomplex sum1 = cc0; dcomplex sum2 = cc0; dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; dcomplex a11, a12, a21, a22, sum1, sum2, fc; double ****svw = new double***[le]; dcomplex ****svs = new dcomplex***[le]; for (int i = 0; i < le; i++) { svw[i] = new double**[3]; svs[i] = new dcomplex**[3]; for (int j = 0; j < 3; j++) { svw[i][j] = new double*[2]; svs[i][j] = new dcomplex*[2]; for (int k = 0; k < 2; k++) { svw[i][j][k] = new double[2](); svs[i][j][k] = new dcomplex[2](); } } } int nlem = le * (le + 2); #pragma omp parallel { #pragma omp for for (int l28 = 1; l28 <= le; l28++) { int lpo = l28 + 1; int ltpo = lpo + l28; double fl = sqrt(1.0 * ltpo); for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l28 == 1 && ilmp == 1) || (l28 == le && ilmp == 3)) continue; // ilmp loop const int svw_start = 12 * (l28 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l28 + lmpml; double flmp = sqrt(1.0 * (lmp + lmp + 1)); Loading @@ -191,40 +198,38 @@ void apcra( if (inpol == 0) { double cgs = cgmpo + cgmmo; double cgd = cgmpo - cgmmo; vec_svw[svw_start] = cgs; vec_svw[svw_start + 1] = cgd; vec_svw[svw_start + 2] = cgd; vec_svw[svw_start + 3] = cgs; svw[l28 - 1][ilmp - 1][0][0] = cgs; svw[l28 - 1][ilmp - 1][0][1] = cgd; svw[l28 - 1][ilmp - 1][1][0] = cgd; svw[l28 - 1][ilmp - 1][1][1] = cgs; } else { // label 22 vec_svw[svw_start] = cgmpo; vec_svw[svw_start + 2] = cgmpo; vec_svw[svw_start + 1] = -cgmmo; vec_svw[svw_start + 3] = cgmmo; svw[l28 - 1][ilmp - 1][0][0] = cgmpo; svw[l28 - 1][ilmp - 1][1][0] = cgmpo; svw[l28 - 1][ilmp - 1][0][1] = -cgmmo; svw[l28 - 1][ilmp - 1][1][1] = cgmmo; } // label 26 } // ilmp loop } // l28 parallel loop // for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted // for (int ilmp = 1; ilmp <= 3; ilmp++) { // for (int ipa = 1; ipa <= 2; ipa++) { // for (int ipamp = 1; ipamp <= 2; ipamp++) { // vec_svs[12 * (l30 - 1) + 4 * (ilmp - 1) + 2 * (ipa - 1) + ipamp - 1] = cc0; // } // } // ipa loop // } // ilmp loop // } // l30 loop #pragma omp for reduction(+: vec_svs[0:12*le]) } // l28 loop for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted for (int ilmp = 1; ilmp <= 3; ilmp++) { for (int ipa = 1; ipa <= 2; ipa++) { for (int ipamp = 1; ipamp <= 2; ipamp++) { svs[l30 - 1][ilmp - 1][ipa - 1][ipamp - 1] = cc0; } } // ipa loop } // ilmp loop } // l30 loop for (int l58 = 1; l58 <= le; l58 ++) { int lpo = l58 + 1; int ltpo = l58 + lpo; int imm = l58 * lpo; for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l58 == 1 && ilmp == 1) || (l58 == le && ilmp == 3)) continue; // ilmp loop const int svs_start = 12 * (l58 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l58 + lmpml; int impmm = lmp * (lmp + 1); dcomplex uimtl = uim * (1.0 * lmpml); uimtl = uim * (1.0 * lmpml); if (lmpml == 0) uimtl = 1.0 + 0.0 * I; for (int im54 = 1; im54 <= ltpo; im54++) { int m = im54 - lpo; Loading @@ -247,7 +252,7 @@ void apcra( int lsmpml = ilsmp - 2; int lsmp = ls + lsmpml; int ismpmm = lsmp * (lsmp + 1); dcomplex uimtls = -uim * (1.0 * lsmpml); uimtls = -uim * (1.0 * lsmpml); if (lsmpml == 0) uimtls = 1.0 + 0.0 * I; for (int ims = 1; ims <= lstpo; ims++) { int ms = ims - lspo; Loading @@ -259,43 +264,35 @@ void apcra( int ismp = ismpmm + msmp; int ismpe = ismp + nlem; double cgcs = cg1(lsmpml, mu, ls, ms); dcomplex fc = (uimtl * uimtls) * (cgc * cgcs); dcomplex ca11 = dconjg(am0m[is - 1][i - 1]); dcomplex ca12 = dconjg(am0m[is - 1][ie - 1]); dcomplex ca21 = dconjg(am0m[ise - 1][i - 1]); dcomplex ca22 = dconjg(am0m[ise - 1][ie - 1]); dcomplex a11 = am0m[ismp - 1][imp - 1]; dcomplex a12 = am0m[ismp - 1][impe - 1]; dcomplex a21 = am0m[ismpe - 1][imp - 1]; dcomplex a22 = am0m[ismpe - 1][impe - 1]; fc = (uimtl * uimtls) * (cgc * cgcs); ca11 = dconjg(am0m[is - 1][i - 1]); ca12 = dconjg(am0m[is - 1][ie - 1]); ca21 = dconjg(am0m[ise - 1][i - 1]); ca22 = dconjg(am0m[ise - 1][ie - 1]); a11 = am0m[ismp - 1][imp - 1]; a12 = am0m[ismp - 1][impe - 1]; a21 = am0m[ismpe - 1][imp - 1]; a22 = am0m[ismpe - 1][impe - 1]; double z11 = zpv[ls - 1][ilsmp - 1][0][0]; double z12 = zpv[ls - 1][ilsmp - 1][0][1]; double z21 = zpv[ls - 1][ilsmp - 1][1][0]; double z22 = zpv[ls - 1][ilsmp - 1][1][1]; vec_svs[svs_start] += ( ( ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22 ) * fc ); vec_svs[svs_start + 1] += ( ( ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22 ) * fc ); vec_svs[svs_start + 2] += ( ( ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22 ) * fc ); vec_svs[svs_start + 3] += ( ( ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22 ) * fc ); svs[l58 - 1][ilmp - 1][0][0] += ((ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][0][1] += ((ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22) * fc); svs[l58 - 1][ilmp - 1][1][0] += ((ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][1][1] += ((ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22) * fc); } // ends ims loop } // ims loop } // ilsmp loop Loading @@ -305,7 +302,8 @@ void apcra( } // im54 loop } // ilmp loop } // l58 loop #pragma omp for reduction(+: sum1, sum2) sum1 = cc0; sum2 = cc0; for (int l68 = 1; l68 <= le; l68++) { //int lpo = l68 + 1; //int ltpo = l68 + lpo; Loading @@ -314,34 +312,33 @@ void apcra( if ((l68 == 1 && ilmp == 1) || (l68 == le && ilmp == 3)) continue; // ilmp loop if (inpol == 0) { sum1 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][1] ); sum2 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][1] ); } else { // label 62 sum1 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][1] ); sum2 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][1] ); } // label 66, ends ilmp loop } // ilmp loop } // l68 loop } // OMP parallel region const double half_pi = acos(0.0); double cofs = half_pi * 2.0 / sqk; gaprm[0][0] = 0.0; Loading @@ -366,14 +363,24 @@ void apcra( gappm[2][0] = cc0; gappm[2][1] = cc0; } // Clean memory delete[] vec_svw; delete[] vec_svs; for (int i = le - 1; i > -1; i--) { for (int j = 2; j > -1; j--) { for (int k = 1; k > -1; k--) { delete[] svw[i][j][k]; delete[] svs[i][j][k]; } delete[] svw[i][j]; delete[] svs[i][j]; } delete[] svw[i]; delete[] svs[i]; } delete[] svw; delete[] svs; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep) { dcomplex result = z; if (nj > 0) { Loading @@ -384,9 +391,6 @@ dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif // #ifdef USE_TARGET_OFFLOAD // #pragma omp begin declare target device_type(any) Loading Loading @@ -1134,7 +1138,7 @@ void hjv( delete[] rfn; } int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { /* NDDMST FIRST DIMENSION OF AM AS DECLARED IN DIMENSION * STATEMENT. * N NUMBER OF ROWS IN AM. Loading @@ -1142,24 +1146,22 @@ int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { */ double *v = new double[nddmst]; double *vi = new double[nddmst](); dcomplex *vec_am = am[0]; const dcomplex cc0 = 0.0 + 0.0 * I; int ier = 0; ier = 0; int nminus = n - 1; const np_int nn = n * n; #pragma omp parallel { #pragma omp for reduction(+: vi[0:nddmst]) for (np_int ij = 0; ij < nn; ij++) { np_int i = ij / n; np_int j = ij % n; vi[i] += ( real(vec_am[i * n + j]) * real(vec_am[i * n + j]) + imag(vec_am[i * n + j]) * imag(vec_am[i * n + j]) #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { #pragma omp parallel for reduction(+: vi[i - 1]) for (int64_t j = 1; j <= n; j++) { vi[i - 1] += ( real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1]) + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1]) ); } #pragma omp for for (np_int i = 0; i < n; i++) v[i] = 1.0 / vi[i]; } // OMP parallel region } // j1319 loop v[i - 1] = 1.0 / vi[i - 1]; } // i1309 loop delete[] vi; // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U. // REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4. // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS Loading Loading @@ -1193,11 +1195,13 @@ int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { v[k - 1] = 1.0 * l; if (psqmax == 0.0) { ier = 1; // return ier; delete[] v; return; } dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; vec_am[(k - 1) * n + k - 1] = ctemp; if (kplus <= n) { #pragma omp parallel for for (int64_t j = kplus; j <= n; j++) { dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); vec_am[(k - 1) * n + j - 1] = -ctemp * cfun; Loading Loading @@ -1242,9 +1246,7 @@ int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { } // i4319 loop } } // l4309 loop delete[] vi; delete[] v; return ier; } void mextc(double vk, double exri, dcomplex **fsac, double **cextlr, double **cext) { Loading Loading
src/libnptm/clu_subs.cpp +244 −242 Original line number Diff line number Diff line Loading @@ -56,22 +56,21 @@ void apc( dcomplex *vec_ac, *vec_gap; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimmp, summ, sume, suem, suee, summp, sumep; dcomplex suemp, sueep; // dcomplex uimmp, summ, sume, suem, suee, summp, sumep; // dcomplex suemp, sueep; double cof = 1.0 / sqk; double cimu = cof / sqrt(2.0); int nlem = le * (le + 2); const int nlemt = nlem + nlem; const int nlemf = 2 * nlemt; vec_ac = new dcomplex[nlemf](); vec_ac = new dcomplex[2 * nlemt](); vec_gap = new dcomplex[6](); #pragma omp parallel for reduction(+: vec_ac[0:nlemf]) for (np_int ij = 0; ij < nlemt * nlemt; ij++) { np_int j45 = ij / nlemt; np_int i45 = ij % nlemt; #pragma omp parallel for simd for (int j45 = 0; j45 < nlemt; j45++) { for (int i45 = 0; i45 < nlemt; i45++) { vec_ac[2 * j45] += (am0m[j45][i45] * w[i45][0]); vec_ac[2 * j45 + 1] += (am0m[j45][i45] * w[i45][1]); } // ij loop } //i45 loop } //j45 loop for (int imu90 = 1; imu90 <=3; imu90++) { int mu = imu90 - 2; gapp[imu90 - 1][0] = cc0; Loading @@ -84,7 +83,7 @@ void apc( if ((l80 == 1 && ilmp == 1) || (l80 == le && ilmp == 3)) continue; // ilmp loop int lmpml = ilmp - 2; int lmp = l80 + lmpml; uimmp = (-1.0 * lmpml) * uim; dcomplex uimmp = (-1.0 * lmpml) * uim; int impmmmp = lmp * (lmp + 1); for (int im70 = 1; im70 <= ltpo; im70++) { int m = im70 - lpo; Loading @@ -96,17 +95,18 @@ void apc( int imp = impmmmp + mmp; int impe = imp + nlem; double cgc = cg1(lmpml, mu, l80, m); int jpo = 1; for (int ipo = 0; ipo < 2; ipo++) { if (ipo == 1) jpo = 0; summ = dconjg(vec_ac[2 * (i - 1) + ipo]) * vec_ac[2 * (imp - 1) + ipo]; sume = dconjg(vec_ac[2 * (i - 1) + ipo]) * vec_ac[2 * (impe - 1) + ipo]; suem = dconjg(vec_ac[2 * (ie - 1) + ipo]) * vec_ac[2 * (imp - 1) + ipo]; suee = dconjg(vec_ac[2 * (ie - 1) + ipo]) * vec_ac[2 * (impe - 1) + ipo]; summp = dconjg(vec_ac[2 * (i - 1) + jpo]) * vec_ac[2 * (imp - 1) + ipo]; sumep = dconjg(vec_ac[2 * (i - 1) + jpo]) * vec_ac[2 * (impe - 1) + ipo]; suemp = dconjg(vec_ac[2 * (ie - 1) + jpo]) * vec_ac[2 * (imp - 1) + ipo]; sueep = dconjg(vec_ac[2 * (ie - 1) + jpo]) * vec_ac[2 * (impe - 1) +ipo]; int jpo = 2; for (int ipo = 1; ipo <= 2; ipo++) { dcomplex summ, sume, suem, suee, summp, sumep, suemp, sueep; if (ipo == 2) jpo = 1; summ = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sume = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suem = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; suee = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; summp = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sumep = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; suemp = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1]; sueep = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1]; if (lmpml != 0) { summ *= uimmp; sume *= uimmp; Loading @@ -118,7 +118,7 @@ void apc( sueep *= uimmp; } // label 55 vec_gap[2 * (imu90 - 1) + ipo] += ( vec_gap[2 * (imu90 - 1) + ipo - 1] += ( ( summ * zpv[l80 - 1][ilmp - 1][0][0] + sume * zpv[l80 - 1][ilmp - 1][0][1] Loading @@ -126,7 +126,7 @@ void apc( + suee * zpv[l80 - 1][ilmp - 1][1][1] ) * cgc ); gapp[imu90 - 1][ipo] += ( gapp[imu90 - 1][ipo - 1] += ( ( summp * zpv[l80 - 1][ilmp - 1][0][0] + sumep * zpv[l80 - 1][ilmp - 1][0][1] Loading @@ -140,19 +140,20 @@ void apc( } // ilmp loop } // l80 loop } // imu90 loop for (int ipo95 = 0; ipo95 < 2; ipo95++) { sume = vec_gap[ipo95] * cimu; suee = vec_gap[2 + ipo95] * cof; suem = vec_gap[4 + ipo95] * cimu; gapr[0][ipo95] = real(sume - suem); gapr[1][ipo95] = real((sume + suem) * uim); gapr[2][ipo95] = real(suee); sumep = gapp[0][ipo95] * cimu; sueep = gapp[1][ipo95] * cof; suemp = gapp[2][ipo95] * cimu; gapp[0][ipo95] = sumep - suemp; gapp[1][ipo95] = (sumep + suemp) * uim; gapp[2][ipo95] = sueep; for (int ipo95 = 1; ipo95 <= 2; ipo95++) { dcomplex sume, suee, suem, sumep, sueep, suemp; sume = vec_gap[ipo95 - 1] * cimu; suee = vec_gap[2 + ipo95 - 1] * cof; suem = vec_gap[4 + ipo95 - 1] * cimu; gapr[0][ipo95 - 1] = real(sume - suem); gapr[1][ipo95 - 1] = real((sume + suem) * uim); gapr[2][ipo95 - 1] = real(suee); sumep = gapp[0][ipo95 - 1] * cimu; sueep = gapp[1][ipo95 - 1] * cof; suemp = gapp[2][ipo95 - 1] * cimu; gapp[0][ipo95 - 1] = sumep - suemp; gapp[1][ipo95 - 1] = (sumep + suemp) * uim; gapp[2][ipo95 - 1] = sueep; } // ipo95 loop // Clean memory delete[] vec_ac; Loading @@ -165,23 +166,29 @@ void apcra( ) { const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; // dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; // dcomplex a11, a12, a21, a22, fc; double *vec_svw = new double[le * 12](); dcomplex *vec_svs = new dcomplex[le * 12](); dcomplex sum1 = cc0; dcomplex sum2 = cc0; dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; dcomplex a11, a12, a21, a22, sum1, sum2, fc; double ****svw = new double***[le]; dcomplex ****svs = new dcomplex***[le]; for (int i = 0; i < le; i++) { svw[i] = new double**[3]; svs[i] = new dcomplex**[3]; for (int j = 0; j < 3; j++) { svw[i][j] = new double*[2]; svs[i][j] = new dcomplex*[2]; for (int k = 0; k < 2; k++) { svw[i][j][k] = new double[2](); svs[i][j][k] = new dcomplex[2](); } } } int nlem = le * (le + 2); #pragma omp parallel { #pragma omp for for (int l28 = 1; l28 <= le; l28++) { int lpo = l28 + 1; int ltpo = lpo + l28; double fl = sqrt(1.0 * ltpo); for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l28 == 1 && ilmp == 1) || (l28 == le && ilmp == 3)) continue; // ilmp loop const int svw_start = 12 * (l28 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l28 + lmpml; double flmp = sqrt(1.0 * (lmp + lmp + 1)); Loading @@ -191,40 +198,38 @@ void apcra( if (inpol == 0) { double cgs = cgmpo + cgmmo; double cgd = cgmpo - cgmmo; vec_svw[svw_start] = cgs; vec_svw[svw_start + 1] = cgd; vec_svw[svw_start + 2] = cgd; vec_svw[svw_start + 3] = cgs; svw[l28 - 1][ilmp - 1][0][0] = cgs; svw[l28 - 1][ilmp - 1][0][1] = cgd; svw[l28 - 1][ilmp - 1][1][0] = cgd; svw[l28 - 1][ilmp - 1][1][1] = cgs; } else { // label 22 vec_svw[svw_start] = cgmpo; vec_svw[svw_start + 2] = cgmpo; vec_svw[svw_start + 1] = -cgmmo; vec_svw[svw_start + 3] = cgmmo; svw[l28 - 1][ilmp - 1][0][0] = cgmpo; svw[l28 - 1][ilmp - 1][1][0] = cgmpo; svw[l28 - 1][ilmp - 1][0][1] = -cgmmo; svw[l28 - 1][ilmp - 1][1][1] = cgmmo; } // label 26 } // ilmp loop } // l28 parallel loop // for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted // for (int ilmp = 1; ilmp <= 3; ilmp++) { // for (int ipa = 1; ipa <= 2; ipa++) { // for (int ipamp = 1; ipamp <= 2; ipamp++) { // vec_svs[12 * (l30 - 1) + 4 * (ilmp - 1) + 2 * (ipa - 1) + ipamp - 1] = cc0; // } // } // ipa loop // } // ilmp loop // } // l30 loop #pragma omp for reduction(+: vec_svs[0:12*le]) } // l28 loop for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted for (int ilmp = 1; ilmp <= 3; ilmp++) { for (int ipa = 1; ipa <= 2; ipa++) { for (int ipamp = 1; ipamp <= 2; ipamp++) { svs[l30 - 1][ilmp - 1][ipa - 1][ipamp - 1] = cc0; } } // ipa loop } // ilmp loop } // l30 loop for (int l58 = 1; l58 <= le; l58 ++) { int lpo = l58 + 1; int ltpo = l58 + lpo; int imm = l58 * lpo; for (int ilmp = 1; ilmp <= 3; ilmp++) { if ((l58 == 1 && ilmp == 1) || (l58 == le && ilmp == 3)) continue; // ilmp loop const int svs_start = 12 * (l58 - 1) + 4 * (ilmp - 1); int lmpml = ilmp - 2; int lmp = l58 + lmpml; int impmm = lmp * (lmp + 1); dcomplex uimtl = uim * (1.0 * lmpml); uimtl = uim * (1.0 * lmpml); if (lmpml == 0) uimtl = 1.0 + 0.0 * I; for (int im54 = 1; im54 <= ltpo; im54++) { int m = im54 - lpo; Loading @@ -247,7 +252,7 @@ void apcra( int lsmpml = ilsmp - 2; int lsmp = ls + lsmpml; int ismpmm = lsmp * (lsmp + 1); dcomplex uimtls = -uim * (1.0 * lsmpml); uimtls = -uim * (1.0 * lsmpml); if (lsmpml == 0) uimtls = 1.0 + 0.0 * I; for (int ims = 1; ims <= lstpo; ims++) { int ms = ims - lspo; Loading @@ -259,43 +264,35 @@ void apcra( int ismp = ismpmm + msmp; int ismpe = ismp + nlem; double cgcs = cg1(lsmpml, mu, ls, ms); dcomplex fc = (uimtl * uimtls) * (cgc * cgcs); dcomplex ca11 = dconjg(am0m[is - 1][i - 1]); dcomplex ca12 = dconjg(am0m[is - 1][ie - 1]); dcomplex ca21 = dconjg(am0m[ise - 1][i - 1]); dcomplex ca22 = dconjg(am0m[ise - 1][ie - 1]); dcomplex a11 = am0m[ismp - 1][imp - 1]; dcomplex a12 = am0m[ismp - 1][impe - 1]; dcomplex a21 = am0m[ismpe - 1][imp - 1]; dcomplex a22 = am0m[ismpe - 1][impe - 1]; fc = (uimtl * uimtls) * (cgc * cgcs); ca11 = dconjg(am0m[is - 1][i - 1]); ca12 = dconjg(am0m[is - 1][ie - 1]); ca21 = dconjg(am0m[ise - 1][i - 1]); ca22 = dconjg(am0m[ise - 1][ie - 1]); a11 = am0m[ismp - 1][imp - 1]; a12 = am0m[ismp - 1][impe - 1]; a21 = am0m[ismpe - 1][imp - 1]; a22 = am0m[ismpe - 1][impe - 1]; double z11 = zpv[ls - 1][ilsmp - 1][0][0]; double z12 = zpv[ls - 1][ilsmp - 1][0][1]; double z21 = zpv[ls - 1][ilsmp - 1][1][0]; double z22 = zpv[ls - 1][ilsmp - 1][1][1]; vec_svs[svs_start] += ( ( ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22 ) * fc ); vec_svs[svs_start + 1] += ( ( ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22 ) * fc ); vec_svs[svs_start + 2] += ( ( ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22 ) * fc ); vec_svs[svs_start + 3] += ( ( ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22 ) * fc ); svs[l58 - 1][ilmp - 1][0][0] += ((ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][0][1] += ((ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22) * fc); svs[l58 - 1][ilmp - 1][1][0] += ((ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][1][1] += ((ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22) * fc); } // ends ims loop } // ims loop } // ilsmp loop Loading @@ -305,7 +302,8 @@ void apcra( } // im54 loop } // ilmp loop } // l58 loop #pragma omp for reduction(+: sum1, sum2) sum1 = cc0; sum2 = cc0; for (int l68 = 1; l68 <= le; l68++) { //int lpo = l68 + 1; //int ltpo = l68 + lpo; Loading @@ -314,34 +312,33 @@ void apcra( if ((l68 == 1 && ilmp == 1) || (l68 == le && ilmp == 3)) continue; // ilmp loop if (inpol == 0) { sum1 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][1] ); sum2 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][1] ); } else { // label 62 sum1 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][1] ); sum2 += ( vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][1] ); } // label 66, ends ilmp loop } // ilmp loop } // l68 loop } // OMP parallel region const double half_pi = acos(0.0); double cofs = half_pi * 2.0 / sqk; gaprm[0][0] = 0.0; Loading @@ -366,14 +363,24 @@ void apcra( gappm[2][0] = cc0; gappm[2][1] = cc0; } // Clean memory delete[] vec_svw; delete[] vec_svs; for (int i = le - 1; i > -1; i--) { for (int j = 2; j > -1; j--) { for (int k = 1; k > -1; k--) { delete[] svw[i][j][k]; delete[] svs[i][j][k]; } delete[] svw[i][j]; delete[] svs[i][j]; } delete[] svw[i]; delete[] svs[i]; } delete[] svw; delete[] svs; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep) { dcomplex result = z; if (nj > 0) { Loading @@ -384,9 +391,6 @@ dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif // #ifdef USE_TARGET_OFFLOAD // #pragma omp begin declare target device_type(any) Loading Loading @@ -1134,7 +1138,7 @@ void hjv( delete[] rfn; } int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { /* NDDMST FIRST DIMENSION OF AM AS DECLARED IN DIMENSION * STATEMENT. * N NUMBER OF ROWS IN AM. Loading @@ -1142,24 +1146,22 @@ int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { */ double *v = new double[nddmst]; double *vi = new double[nddmst](); dcomplex *vec_am = am[0]; const dcomplex cc0 = 0.0 + 0.0 * I; int ier = 0; ier = 0; int nminus = n - 1; const np_int nn = n * n; #pragma omp parallel { #pragma omp for reduction(+: vi[0:nddmst]) for (np_int ij = 0; ij < nn; ij++) { np_int i = ij / n; np_int j = ij % n; vi[i] += ( real(vec_am[i * n + j]) * real(vec_am[i * n + j]) + imag(vec_am[i * n + j]) * imag(vec_am[i * n + j]) #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { #pragma omp parallel for reduction(+: vi[i - 1]) for (int64_t j = 1; j <= n; j++) { vi[i - 1] += ( real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1]) + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1]) ); } #pragma omp for for (np_int i = 0; i < n; i++) v[i] = 1.0 / vi[i]; } // OMP parallel region } // j1319 loop v[i - 1] = 1.0 / vi[i - 1]; } // i1309 loop delete[] vi; // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U. // REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4. // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS Loading Loading @@ -1193,11 +1195,13 @@ int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { v[k - 1] = 1.0 * l; if (psqmax == 0.0) { ier = 1; // return ier; delete[] v; return; } dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; vec_am[(k - 1) * n + k - 1] = ctemp; if (kplus <= n) { #pragma omp parallel for for (int64_t j = kplus; j <= n; j++) { dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); vec_am[(k - 1) * n + j - 1] = -ctemp * cfun; Loading Loading @@ -1242,9 +1246,7 @@ int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { } // i4319 loop } } // l4309 loop delete[] vi; delete[] v; return ier; } void mextc(double vk, double exri, dcomplex **fsac, double **cextlr, double **cext) { Loading