Commit 760a1b48 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use omp parallel for loops for embrrassingly parallel sections of LUCIN

parent 77e57978
Loading
Loading
Loading
Loading
+23 −17
Original line number Diff line number Diff line
@@ -1152,21 +1152,23 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
   * IER     IS REPLACED BY 1 FOR SINGULARITY.
   */
  double *v = new double[nddmst];
  double *vi = new double[nddmst]();
  dcomplex *vec_am = am[0];
  dcomplex ctemp, cfun;
  const dcomplex cc0 = 0.0 + 0.0 * I;
  ier = 0;
  int nminus = n - 1;
#pragma omp parallel for
  for (int64_t i = 1; i <= n; i++) {
    double sum = 0.0;
#pragma omp parallel for reduction(+: vi[i - 1])
    for (int64_t j = 1; j <= n; j++) {
      sum += (
      vi[i - 1] += (
	real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1])
	+ imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1])
      );
    } // j1319 loop
    v[i - 1] = 1.0 / sum;
    v[i - 1] = 1.0 / vi[i - 1];
  } // i1309 loop
  delete[] vi;
  // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U.
  //    REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4.
  //    (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS
@@ -1177,8 +1179,8 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
    int64_t l = k;
    double psqmax = 0.0;
    for (int64_t i = k; i <= n; i++) {
      cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n);
      ctemp = -cfun;
      dcomplex cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n);
      dcomplex ctemp = -cfun;
      vec_am[(i - 1) * n + k - 1] = ctemp;
      double psq = v[i - 1] * (real(ctemp) * real(ctemp) + imag(ctemp) * imag(ctemp));
      if (psq > psqmax) {
@@ -1187,8 +1189,10 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
      }
    } // i2029 loop
    if (l != k) {
      // Row swapping can be made parallel
#pragma omp parallel for
      for (int64_t j = 1; j <= n; j++) {
	ctemp = vec_am[(k - 1) * n + j - 1];
	dcomplex ctemp = vec_am[(k - 1) * n + j - 1];
	vec_am[(k - 1) * n + j - 1] = vec_am[(l - 1) * n + j - 1];
	vec_am[(l - 1) * n + j - 1] = ctemp;
      } // j2049 loop
@@ -1201,11 +1205,12 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
      delete[] v;
      return;
    }
    ctemp = 1.0 / vec_am[(k - 1) * n + k - 1];
    am[k - 1][k - 1] = ctemp;
    dcomplex ctemp = 1.0 / vec_am[(k - 1) * n + k - 1];
    vec_am[(k - 1) * n + k - 1] = ctemp;
    if (kplus <= n) {
#pragma omp parallel for
      for (int64_t j = kplus; j <= n; j++) {
	cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n);
	dcomplex cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n);
	vec_am[(k - 1) * n + j - 1] = -ctemp * cfun;
      } // j2059 loop
    }
@@ -1215,7 +1220,7 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
  for (int64_t k = 1; k <= nminus; k++) {
    int64_t kplus = k + 1;
    for (int64_t i = kplus; i <= n; i++) {
      cfun = cdtp(cc0, vec_am, i, k, k, i - k, n);
      dcomplex cfun = cdtp(cc0, vec_am, i, k, k, i - k, n);
      vec_am[(i - 1) * n + k - 1] = -vec_am[(i - 1) * n + i - 1] * cfun;
      cfun = cdtp(vec_am[(k - 1) * n + i - 1], vec_am, k, kplus, i, i - k - 1, n);
      vec_am[(k - 1) * n + i - 1] = -cfun;
@@ -1225,11 +1230,11 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
  for (int64_t k = 1; k <= n; k++) {
    for (int64_t i = 1; i <= n; i++) {
      if (i < k) {
	cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n);
	dcomplex cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n);
	vec_am[(i - 1) * n + k -1] = cfun;
      }
      else {
	cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n);
	dcomplex cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n);
	vec_am[(i - 1) * n + k - 1] = cfun;
      }
    } // i4119 loop
@@ -1240,8 +1245,9 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
    int64_t k = n - l + 1;
    int64_t kcol = (int64_t)(v[k - 1]);
    if (kcol != k) {
#pragma omp parallel for
      for (int64_t i = 1; i <= n; i++) {
	ctemp = vec_am[(i - 1) * n + k - 1];
	dcomplex ctemp = vec_am[(i - 1) * n + k - 1];
	vec_am[(i - 1) * n + k - 1] = vec_am[(i - 1) * n + kcol - 1];
	vec_am[(i - 1) * n + kcol - 1] = ctemp;
      } // i4319 loop