Loading src/libnptm/clu_subs.cpp +25 −17 Original line number Diff line number Diff line Loading @@ -388,6 +388,9 @@ void apcra( delete[] svs; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep) { dcomplex result = z; if (nj > 0) { Loading @@ -398,6 +401,9 @@ dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif // #ifdef USE_TARGET_OFFLOAD // #pragma omp begin declare target device_type(any) Loading Loading @@ -1145,7 +1151,7 @@ void hjv( delete[] rfn; } void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { /* NDDMST FIRST DIMENSION OF AM AS DECLARED IN DIMENSION * STATEMENT. * N NUMBER OF ROWS IN AM. Loading @@ -1153,22 +1159,24 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { */ double *v = new double[nddmst]; double *vi = new double[nddmst](); dcomplex *vec_am = am[0]; const dcomplex cc0 = 0.0 + 0.0 * I; ier = 0; int ier = 0; int nminus = n - 1; #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { #pragma omp parallel for reduction(+: vi[i - 1]) for (int64_t j = 1; j <= n; j++) { vi[i - 1] += ( real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1]) + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1]) const np_int nn = n * n; #pragma omp parallel { #pragma omp for reduction(+: vi[0:nddmst]) for (np_int ij = 0; ij < nn; ij++) { np_int i = ij / n; np_int j = ij % n; vi[i] += ( real(vec_am[i * n + j]) * real(vec_am[i * n + j]) + imag(vec_am[i * n + j]) * imag(vec_am[i * n + j]) ); } // j1319 loop v[i - 1] = 1.0 / vi[i - 1]; } // i1309 loop delete[] vi; } #pragma omp for for (np_int i = 0; i < n; i++) v[i] = 1.0 / vi[i]; } // OMP parallel region // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U. // REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4. // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS Loading Loading @@ -1202,13 +1210,11 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { v[k - 1] = 1.0 * l; if (psqmax == 0.0) { ier = 1; delete[] v; return; // return ier; } dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; vec_am[(k - 1) * n + k - 1] = ctemp; if (kplus <= n) { #pragma omp parallel for for (int64_t j = kplus; j <= n; j++) { dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); vec_am[(k - 1) * n + j - 1] = -ctemp * cfun; Loading Loading @@ -1253,7 +1259,9 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { } // i4319 loop } } // l4309 loop delete[] vi; delete[] v; return ier; } void mextc(double vk, double exri, dcomplex **fsac, double **cextlr, double **cext) { Loading Loading
src/libnptm/clu_subs.cpp +25 −17 Original line number Diff line number Diff line Loading @@ -388,6 +388,9 @@ void apcra( delete[] svs; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep) { dcomplex result = z; if (nj > 0) { Loading @@ -398,6 +401,9 @@ dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif // #ifdef USE_TARGET_OFFLOAD // #pragma omp begin declare target device_type(any) Loading Loading @@ -1145,7 +1151,7 @@ void hjv( delete[] rfn; } void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { int lucin(dcomplex *vec_am, const np_int nddmst, np_int n) { /* NDDMST FIRST DIMENSION OF AM AS DECLARED IN DIMENSION * STATEMENT. * N NUMBER OF ROWS IN AM. Loading @@ -1153,22 +1159,24 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { */ double *v = new double[nddmst]; double *vi = new double[nddmst](); dcomplex *vec_am = am[0]; const dcomplex cc0 = 0.0 + 0.0 * I; ier = 0; int ier = 0; int nminus = n - 1; #pragma omp parallel for for (int64_t i = 1; i <= n; i++) { #pragma omp parallel for reduction(+: vi[i - 1]) for (int64_t j = 1; j <= n; j++) { vi[i - 1] += ( real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1]) + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1]) const np_int nn = n * n; #pragma omp parallel { #pragma omp for reduction(+: vi[0:nddmst]) for (np_int ij = 0; ij < nn; ij++) { np_int i = ij / n; np_int j = ij % n; vi[i] += ( real(vec_am[i * n + j]) * real(vec_am[i * n + j]) + imag(vec_am[i * n + j]) * imag(vec_am[i * n + j]) ); } // j1319 loop v[i - 1] = 1.0 / vi[i - 1]; } // i1309 loop delete[] vi; } #pragma omp for for (np_int i = 0; i < n; i++) v[i] = 1.0 / vi[i]; } // OMP parallel region // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U. // REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4. // (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS Loading Loading @@ -1202,13 +1210,11 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { v[k - 1] = 1.0 * l; if (psqmax == 0.0) { ier = 1; delete[] v; return; // return ier; } dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1]; vec_am[(k - 1) * n + k - 1] = ctemp; if (kplus <= n) { #pragma omp parallel for for (int64_t j = kplus; j <= n; j++) { dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n); vec_am[(k - 1) * n + j - 1] = -ctemp * cfun; Loading Loading @@ -1253,7 +1259,9 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) { } // i4319 loop } } // l4309 loop delete[] vi; delete[] v; return ier; } void mextc(double vk, double exri, dcomplex **fsac, double **cextlr, double **cext) { Loading