Commit 77e57978 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use vectorized version of AM in LUCIN

parent f0d4acb7
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -79,9 +79,9 @@ void apcra(
 * \param jf: `int`
 * \param k: `int`
 * \param nj: `int`
 * \param nddmst: `np_int` Maximum size of the matrix.
 * \param istep: `np_int` Size of rows in the matrix.
 */
dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int nddmst);
dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep);

/*! \brief C++ porting of CGEV. QUESTION: description?
 *
+1 −1
Original line number Diff line number Diff line
@@ -86,6 +86,6 @@ void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, doub
  zinvert(mat, size, ier);
#endif
#else
  lucin(mat, max_size, size, ier);
  lucin(mat, size, size, ier);
#endif
}
+45 −49
Original line number Diff line number Diff line
@@ -388,16 +388,12 @@ void apcra(
  delete[] svs;
}

dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int nddmst) {
  /* NOTE: the original FORTRAN code treats the AM matrix as a
   * vector. This is not directly allowed in C++ and it requires
   * accounting for the different dimensions.
   */
dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep) {
  dcomplex result = z;
  if (nj > 0) {
    int jl = jf + nj - 1;
    for (int j = jf; j <= jl; j++) {
      result += (vec_am[nddmst * (i - 1) + j - 1] * vec_am[nddmst * (j - 1) + k - 1]);
      result += (vec_am[(i - 1) * istep + j - 1] * vec_am[(j - 1) * istep + k - 1]);
    }
  }
  return result;
@@ -1161,93 +1157,93 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier) {
  const dcomplex cc0 = 0.0 + 0.0 * I;
  ier = 0;
  int nminus = n - 1;
  for (int64_t i = 0; i < n; i++) {
  for (int64_t i = 1; i <= n; i++) {
    double sum = 0.0;
    for (int64_t j = 0; j < n; j++) {
    for (int64_t j = 1; j <= n; j++) {
      sum += (
	      real(vec_am[n * i + j]) * real(vec_am[n * i + j])
	      + imag(vec_am[n * i + j]) * imag(vec_am[n * i + j])
	      real(vec_am[(i - 1) * n + j - 1]) * real(vec_am[(i - 1) * n + j - 1])
	      + imag(vec_am[(i - 1) * n + j - 1]) * imag(vec_am[(i - 1) * n + j - 1])
	      );
    } // j1319 loop
    v[i] = 1.0 / sum;
    v[i - 1] = 1.0 / sum;
  } // i1309 loop
  // 2. REPLACE AM BY TRIANGULAR MATRICES (L,U) WHERE AM=L*U.
  //    REPLACE L(I,I) BY 1/L(I,I), READY FOR SECTION 4.
  //    (ROW INTERCHANGES TAKE PLACE, AND THE INDICES OF THE PIVOTAL ROWS
  //    ARE PLACED IN V.)
  for (int64_t k = 0; k < n; k++) {
    int64_t kplus = k + 2;
    int64_t kminus = k;
  for (int64_t k = 1; k <= n; k++) {
    int64_t kplus = k + 1;
    int64_t kminus = k - 1;
    int64_t l = k;
    double psqmax = 0.0;
    for (int64_t i = k; i < n; i++) {
      cfun = cdtp(-vec_am[n * i + k], vec_am, i + 1, 1, k + 1, kminus, n);
    for (int64_t i = k; i <= n; i++) {
      cfun = cdtp(-vec_am[(i - 1) * n + k - 1], vec_am, i, 1, k, kminus, n);
      ctemp = -cfun;
      vec_am[n * i + k] = ctemp;
      double psq = v[i] * (real(ctemp) * real(ctemp) + imag(ctemp) * imag(ctemp));
      vec_am[(i - 1) * n + k - 1] = ctemp;
      double psq = v[i - 1] * (real(ctemp) * real(ctemp) + imag(ctemp) * imag(ctemp));
      if (psq > psqmax) {
	psqmax = psq;
	l = i;
      }
    } // i2029 loop
    if (l != k) {
      for (int64_t j = 0; j < n; j++) {
	ctemp = vec_am[n * k + j];
	vec_am[n * k + j] = vec_am[n * l + j];
	vec_am[n * l + j] = ctemp;
      for (int64_t j = 1; j <= n; j++) {
	ctemp = vec_am[(k - 1) * n + j - 1];
	vec_am[(k - 1) * n + j - 1] = vec_am[(l - 1) * n + j - 1];
	vec_am[(l - 1) * n + j - 1] = ctemp;
      } // j2049 loop
      v[l] = v[k];
      v[l - 1] = v[k - 1];
    }
    // label 2011
    v[k] = 1.0 * l;
    v[k - 1] = 1.0 * l;
    if (psqmax == 0.0) {
      ier = 1;
      delete[] v;
      return;
    }
    ctemp = 1.0 / vec_am[n * k + k];
    vec_am[n * k + k] = ctemp;
    ctemp = 1.0 / vec_am[(k - 1) * n + k - 1];
    am[k - 1][k - 1] = ctemp;
    if (kplus <= n) {
      for (int64_t j = k + 1; j < n; j++) {
	cfun = cdtp(-vec_am[n * k + j], vec_am, k + 1, 1, j + 1, kminus, n);
	vec_am[n * k + j] = -ctemp * cfun;
      for (int64_t j = kplus; j <= n; j++) {
	cfun = cdtp(-vec_am[(k - 1) * n + j - 1], vec_am, k, 1, j, kminus, n);
	vec_am[(k - 1) * n + j - 1] = -ctemp * cfun;
      } // j2059 loop
    }
  } // k2019 loop
  // 4.  REPLACE AM BY ITS INVERSE AMINV.
  // 4.1 REPLACE L AND U BY THEIR INVERSE LINV AND UINV.
  for (int64_t k = 0; k < nminus; k++) {
  for (int64_t k = 1; k <= nminus; k++) {
    int64_t kplus = k + 1;
    for (int64_t i = kplus; i < n; i++) {
      cfun = cdtp(cc0, vec_am, i + 1, k + 1, k + 1, i - k, n);
      vec_am[n * i + k] = -vec_am[n * i + i] * cfun;
      cfun = cdtp(vec_am[n * k + i], vec_am, k + 1, kplus + 1, i + 1, i - k - 1, n);
      vec_am[n * k + i] = -cfun;
    for (int64_t i = kplus; i <= n; i++) {
      cfun = cdtp(cc0, vec_am, i, k, k, i - k, n);
      vec_am[(i - 1) * n + k - 1] = -vec_am[(i - 1) * n + i - 1] * cfun;
      cfun = cdtp(vec_am[(k - 1) * n + i - 1], vec_am, k, kplus, i, i - k - 1, n);
      vec_am[(k - 1) * n + i - 1] = -cfun;
    } // i4119 loop
  } // k4109 loop
  // 4.2 FORM AMINV=UINV*LINV.
  for (int64_t k = 0; k < n; k++) {
    for (int64_t i = 0; i < n; i++) {
  for (int64_t k = 1; k <= n; k++) {
    for (int64_t i = 1; i <= n; i++) {
      if (i < k) {
	cfun = cdtp(cc0, vec_am, i + 1, k + 1, k + 1, n - k, n);
	vec_am[n * i + k] = cfun;
	cfun = cdtp(cc0, vec_am, i, k, k, n - k + 1, n);
	vec_am[(i - 1) * n + k -1] = cfun;
      }
      else {
	cfun = cdtp(vec_am[n * i + k], vec_am, i + 1, i + 2, k + 1, n - i - 1, n);
	vec_am[n * i + k] = cfun;
	cfun = cdtp(vec_am[(i - 1) * n + k - 1], vec_am, i, i + 1, k, n - i, n);
	vec_am[(i - 1) * n + k - 1] = cfun;
      }
    } // i4119 loop
  } // k4209 loop
  // 4.3 INTERCHANGE COLUMNS OF AMINV AS SPECIFIED BY V, BUT IN REVERSE
  //     ORDER.
  for (int64_t l = 0; l < n; l++) {
    int64_t k = n - 1 - l;
    int64_t kcol = (int64_t)(v[k]);
  for (int64_t l = 1; l <= n; l++) {
    int64_t k = n - l + 1;
    int64_t kcol = (int64_t)(v[k - 1]);
    if (kcol != k) {
      for (int64_t i = 0; i < n; i++) {
	ctemp = vec_am[n * i + k];
	vec_am[n * i + k] = vec_am[n * i + kcol];
	vec_am[n * i + kcol] = ctemp;
      for (int64_t i = 1; i <= n; i++) {
	ctemp = vec_am[(i - 1) * n + k - 1];
	vec_am[(i - 1) * n + k - 1] = vec_am[(i - 1) * n + kcol - 1];
	vec_am[(i - 1) * n + kcol - 1] = ctemp;
      } // i4319 loop
    }
  } // l4309 loop