Commit 98c83a29 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use vectorized inner matrices in apc()

parent a71d4380
Loading
Loading
Loading
Loading
+36 −43
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ void apc(
  double ****zpv, int le, dcomplex **am0m, dcomplex **w,
  double sqk, double **gapr, dcomplex **gapp
) {
  dcomplex **ac, **gap;
  dcomplex *vec_ac, *vec_gap;
  const dcomplex cc0 = 0.0 + 0.0 * I;
  const dcomplex uim = 0.0 + 1.0 * I;
  dcomplex uimmp, summ, sume, suem, suee, summp, sumep;
@@ -62,24 +62,19 @@ void apc(
  double cimu = cof / sqrt(2.0);
  int nlem = le * (le + 2);
  const int nlemt = nlem + nlem;
  ac = new dcomplex*[nlemt];
  gap = new dcomplex*[3];
  for (int ai = 0; ai < nlemt; ai++) ac[ai] = new dcomplex[2]();
  for (int gi = 0; gi < 3; gi++) gap[gi] = new dcomplex[2]();
  const int nlemf = 2 * nlemt;
  vec_ac = new dcomplex[nlemf]();
  vec_gap = new dcomplex[6]();
  for (int j45 = 1; j45 <= nlemt; j45++) {
    int j = j45 - 1;
    ac[j][0] = cc0;
    ac[j][1] = cc0;
    for (int i45 = 1; i45 <= nlemt; i45++) {
      int i = i45 - 1;
      ac[j][0] += (am0m[j][i] * w[i][0]);
      ac[j][1] += (am0m[j][i] * w[i][1]);
      vec_ac[2 * j] += (am0m[j][i] * w[i][0]);
      vec_ac[2 * j + 1] += (am0m[j][i] * w[i][1]);
    } //i45 loop
  } //j45 loop
  for (int imu90 = 1; imu90 <=3; imu90++) {
    int mu = imu90 - 2;
    gap[imu90 - 1][0] = cc0;
    gap[imu90 - 1][1] = cc0;
    gapp[imu90 - 1][0] = cc0;
    gapp[imu90 - 1][1] = cc0;
    for (int l80 =1; l80 <= le; l80++) {
@@ -105,14 +100,14 @@ void apc(
	    int jpo = 2;
	    for (int ipo = 1; ipo <= 2; ipo++) {
	      if (ipo == 2) jpo = 1;
	      summ = dconjg(ac[i - 1][ipo - 1]) * ac[imp - 1][ipo - 1];
	      sume = dconjg(ac[i - 1][ipo - 1]) * ac[impe - 1][ipo - 1];
	      suem = dconjg(ac[ie - 1][ipo - 1]) * ac[imp - 1][ipo - 1];
	      suee = dconjg(ac[ie - 1][ipo - 1]) * ac[impe - 1][ipo - 1];
	      summp = dconjg(ac[i - 1][jpo - 1]) * ac[imp - 1][ipo - 1];
	      sumep = dconjg(ac[i - 1][jpo - 1]) * ac[impe - 1][ipo - 1];
	      suemp = dconjg(ac[ie - 1][jpo - 1]) * ac[imp - 1][ipo - 1];
	      sueep = dconjg(ac[ie - 1][jpo - 1]) * ac[impe - 1][ipo - 1];
	      summ = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      sume = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1];
	      suem = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      suee = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1];
	      summp = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      sumep = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1];
	      suemp = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      sueep = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) +ipo - 1];
	      if (lmpml != 0) {
		summ *= uimmp;
		sume *= uimmp;
@@ -124,7 +119,7 @@ void apc(
		sueep *= uimmp;
	      }
	      // label 55
	      gap[imu90 - 1][ipo - 1] += (
	      vec_gap[2 * (imu90 - 1) + ipo - 1] += (
		(
		  summ * zpv[l80 - 1][ilmp - 1][0][0]
		  + sume * zpv[l80 - 1][ilmp - 1][0][1]
@@ -147,9 +142,9 @@ void apc(
    } // l80 loop
  } // imu90 loop
  for (int ipo95 = 1; ipo95 <= 2; ipo95++) {
    sume = gap[0][ipo95 - 1] * cimu;
    suee = gap[1][ipo95 - 1] * cof;
    suem = gap[2][ipo95 - 1] * cimu;
    sume = vec_gap[ipo95 - 1] * cimu;
    suee = vec_gap[2 + ipo95 - 1] * cof;
    suem = vec_gap[4 + ipo95 - 1] * cimu;
    gapr[0][ipo95 - 1] = real(sume - suem);
    gapr[1][ipo95 - 1] = real((sume + suem) * uim);
    gapr[2][ipo95 - 1] = real(suee);
@@ -161,10 +156,8 @@ void apc(
    gapp[2][ipo95 - 1] = sueep;
  } // ipo95 loop
  // Clean memory
  for (int ai = nlemt - 1; ai > -1; ai--) delete[] ac[ai];
  for (int gi = 2; gi > -1; gi--) delete[] gap[gi];
  delete[] ac;
  delete[] gap;
  delete[] vec_ac;
  delete[] vec_gap;
}

void apcra(