Loading src/libnptm/clu_subs.cpp +23 −17 Original line number Diff line number Diff line Loading @@ -1152,21 +1152,23 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { * IER IS REPLACED BY 1 FOR SINGULARITY. */ double *v = new double[nddmst]; double *vi = new double[nddmst](); dcomplex *vec_am = am[0]; dcomplex ctemp, cfun; const dcomplex cc0 = 0.0 + 0.0 * I; ier = 0; int nminus = n - 1; #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { double sum = 0.0; #pragma omp parallel for reduction(+: vi[i - 1]) for (int64_t j = 1; j <= n; j++) { sum += ( vi[i - 1] += ( real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1]) + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1]) ); } // j1319 loop v[i - 1] = 1.0 / sum; v[i - 1] = 1.0 / vi[i - 1]; } // i1309 loop delete[] vi; // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U. // REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4. // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS Loading @@ -1177,8 +1179,8 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { int64_t l = k; double psqmax = 0.0; for (int64_t i = k; i <= n; i++) { cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n); ctemp = -cfun; dcomplex cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n); dcomplex ctemp = -cfun; vec_am[(i - 1) * n + k - 1] = ctemp; double psq = v[i - 1] * (real(ctemp) * real(ctemp) + imag(ctemp) * imag(ctemp)); if (psq > psqmax) { Loading @@ -1187,8 +1189,10 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { } } // i2029 loop if (l != k) { // Row swapping can be made parallel #pragma omp parallel for for (int64_t j = 1; j <= n; j++) { ctemp = vec_am[(k - 1) * n + j - 1]; dcomplex ctemp = vec_am[(k - 1) * n + j - 1]; vec_am[(k - 1) * n + j - 1] = vec_am[(l - 1) * n + j - 1]; vec_am[(l - 1) * n + j - 1] = ctemp; } // j2049 loop Loading @@ -1201,11 +1205,12 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { delete[] v; return; } ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; am[k - 1][k - 1] = ctemp; dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; vec_am[(k - 1) * n + k - 1] = ctemp; if (kplus <= n) { #pragma omp parallel for for (int64_t j = kplus; j <= n; j++) { cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); vec_am[(k - 1) * n + j - 1] = -ctemp * cfun; } // j2059 loop } Loading @@ -1215,7 +1220,7 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { for (int64_t k = 1; k <= nminus; k++) { int64_t kplus = k + 1; for (int64_t i = kplus; i <= n; i++) { cfun = cdtp(cc0, vec_am, i, k, k, i - k, n); dcomplex cfun = cdtp(cc0, vec_am, i, k, k, i - k, n); vec_am[(i - 1) * n + k - 1] = -vec_am[(i - 1) * n + i - 1] * cfun; cfun = cdtp(vec_am[(k - 1) * n + i - 1], vec_am, k, kplus, i, i - k - 1, n); vec_am[(k - 1) * n + i - 1] = -cfun; Loading @@ -1225,11 +1230,11 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { for (int64_t k = 1; k <= n; k++) { for (int64_t i = 1; i <= n; i++) { if (i < k) { cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n); dcomplex cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n); vec_am[(i - 1) * n + k -1] = cfun; } else { cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n); dcomplex cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n); vec_am[(i - 1) * n + k - 1] = cfun; } } // i4119 loop Loading @@ -1240,8 +1245,9 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { int64_t k = n - l + 1; int64_t kcol = (int64_t)(v[k - 1]); if (kcol != k) { #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { ctemp = vec_am[(i - 1) * n + k - 1]; dcomplex ctemp = vec_am[(i - 1) * n + k - 1]; vec_am[(i - 1) * n + k - 1] = vec_am[(i - 1) * n + kcol - 1]; vec_am[(i - 1) * n + kcol - 1] = ctemp; } // i4319 loop Loading Loading
src/libnptm/clu_subs.cpp +23 −17 Original line number Diff line number Diff line Loading @@ -1152,21 +1152,23 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { * IER IS REPLACED BY 1 FOR SINGULARITY. */ double *v = new double[nddmst]; double *vi = new double[nddmst](); dcomplex *vec_am = am[0]; dcomplex ctemp, cfun; const dcomplex cc0 = 0.0 + 0.0 * I; ier = 0; int nminus = n - 1; #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { double sum = 0.0; #pragma omp parallel for reduction(+: vi[i - 1]) for (int64_t j = 1; j <= n; j++) { sum += ( vi[i - 1] += ( real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1]) + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1]) ); } // j1319 loop v[i - 1] = 1.0 / sum; v[i - 1] = 1.0 / vi[i - 1]; } // i1309 loop delete[] vi; // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U. // REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4. // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS Loading @@ -1177,8 +1179,8 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { int64_t l = k; double psqmax = 0.0; for (int64_t i = k; i <= n; i++) { cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n); ctemp = -cfun; dcomplex cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n); dcomplex ctemp = -cfun; vec_am[(i - 1) * n + k - 1] = ctemp; double psq = v[i - 1] * (real(ctemp) * real(ctemp) + imag(ctemp) * imag(ctemp)); if (psq > psqmax) { Loading @@ -1187,8 +1189,10 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { } } // i2029 loop if (l != k) { // Row swapping can be made parallel #pragma omp parallel for for (int64_t j = 1; j <= n; j++) { ctemp = vec_am[(k - 1) * n + j - 1]; dcomplex ctemp = vec_am[(k - 1) * n + j - 1]; vec_am[(k - 1) * n + j - 1] = vec_am[(l - 1) * n + j - 1]; vec_am[(l - 1) * n + j - 1] = ctemp; } // j2049 loop Loading @@ -1201,11 +1205,12 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { delete[] v; return; } ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; am[k - 1][k - 1] = ctemp; dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; vec_am[(k - 1) * n + k - 1] = ctemp; if (kplus <= n) { #pragma omp parallel for for (int64_t j = kplus; j <= n; j++) { cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); vec_am[(k - 1) * n + j - 1] = -ctemp * cfun; } // j2059 loop } Loading @@ -1215,7 +1220,7 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { for (int64_t k = 1; k <= nminus; k++) { int64_t kplus = k + 1; for (int64_t i = kplus; i <= n; i++) { cfun = cdtp(cc0, vec_am, i, k, k, i - k, n); dcomplex cfun = cdtp(cc0, vec_am, i, k, k, i - k, n); vec_am[(i - 1) * n + k - 1] = -vec_am[(i - 1) * n + i - 1] * cfun; cfun = cdtp(vec_am[(k - 1) * n + i - 1], vec_am, k, kplus, i, i - k - 1, n); vec_am[(k - 1) * n + i - 1] = -cfun; Loading @@ -1225,11 +1230,11 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { for (int64_t k = 1; k <= n; k++) { for (int64_t i = 1; i <= n; i++) { if (i < k) { cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n); dcomplex cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n); vec_am[(i - 1) * n + k -1] = cfun; } else { cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n); dcomplex cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n); vec_am[(i - 1) * n + k - 1] = cfun; } } // i4119 loop Loading @@ -1240,8 +1245,9 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { int64_t k = n - l + 1; int64_t kcol = (int64_t)(v[k - 1]); if (kcol != k) { #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { ctemp = vec_am[(i - 1) * n + k - 1]; dcomplex ctemp = vec_am[(i - 1) * n + k - 1]; vec_am[(i - 1) * n + k - 1] = vec_am[(i - 1) * n + kcol - 1]; vec_am[(i - 1) * n + kcol - 1] = ctemp; } // i4319 loop Loading