Commit 3336099a authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use OpenMP to parallelize apc() private vector initializations

parent 98c83a29
Loading
Loading
Loading
Loading
+34 −35
Original line number Diff line number Diff line
@@ -65,14 +65,13 @@ void apc(
  const int nlemf = 2 * nlemt;
  vec_ac = new dcomplex[nlemf]();
  vec_gap = new dcomplex[6]();
  for (int j45 = 1; j45 <= nlemt; j45++) {
    int j = j45 - 1;
    for (int i45 = 1; i45 <= nlemt; i45++) {
      int i = i45 - 1;
      vec_ac[2 * j] += (am0m[j][i] * w[i][0]);
      vec_ac[2 * j + 1] += (am0m[j][i] * w[i][1]);
    } //i45 loop
  } //j45 loop
#pragma omp parallel for reduction(+: vec_ac[0:nlemf])
  for (np_int ij = 0; ij < nlemt * nlemt; ij++) {
    np_int j45 = ij / nlemt;
    np_int i45 = ij % nlemt;
    vec_ac[2 * j45] += (am0m[j45][i45] * w[i45][0]);
    vec_ac[2 * j45 + 1] += (am0m[j45][i45] * w[i45][1]);
  } // ij loop
  for (int imu90 = 1; imu90 <=3; imu90++) {
    int mu = imu90 - 2;
    gapp[imu90 - 1][0] = cc0;
@@ -97,17 +96,17 @@ void apc(
	    int imp = impmmmp + mmp;
	    int impe = imp + nlem;
	    double cgc = cg1(lmpml, mu, l80, m);
	    int jpo = 2;
	    for (int ipo = 1; ipo <= 2; ipo++) {
	      if (ipo == 2) jpo = 1;
	      summ = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      sume = dconjg(vec_ac[2 * (i - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1];
	      suem = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      suee = dconjg(vec_ac[2 * (ie - 1) + ipo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1];
	      summp = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      sumep = dconjg(vec_ac[2 * (i - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) + ipo - 1];
	      suemp = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (imp - 1) + ipo - 1];
	      sueep = dconjg(vec_ac[2 * (ie - 1) + jpo - 1]) * vec_ac[2 * (impe - 1) +ipo - 1];
	    int jpo = 1;
	    for (int ipo = 0; ipo < 2; ipo++) {
	      if (ipo == 1) jpo = 0;
	      summ = dconjg(vec_ac[2 * (i - 1) + ipo]) * vec_ac[2 * (imp - 1) + ipo];
	      sume = dconjg(vec_ac[2 * (i - 1) + ipo]) * vec_ac[2 * (impe - 1) + ipo];
	      suem = dconjg(vec_ac[2 * (ie - 1) + ipo]) * vec_ac[2 * (imp - 1) + ipo];
	      suee = dconjg(vec_ac[2 * (ie - 1) + ipo]) * vec_ac[2 * (impe - 1) + ipo];
	      summp = dconjg(vec_ac[2 * (i - 1) + jpo]) * vec_ac[2 * (imp - 1) + ipo];
	      sumep = dconjg(vec_ac[2 * (i - 1) + jpo]) * vec_ac[2 * (impe - 1) + ipo];
	      suemp = dconjg(vec_ac[2 * (ie - 1) + jpo]) * vec_ac[2 * (imp - 1) + ipo];
	      sueep = dconjg(vec_ac[2 * (ie - 1) + jpo]) * vec_ac[2 * (impe - 1) +ipo];
	      if (lmpml != 0) {
		summ *= uimmp;
		sume *= uimmp;
@@ -119,7 +118,7 @@ void apc(
		sueep *= uimmp;
	      }
	      // label 55
	      vec_gap[2 * (imu90 - 1) + ipo - 1] += (
	      vec_gap[2 * (imu90 - 1) + ipo] += (
		(
		  summ * zpv[l80 - 1][ilmp - 1][0][0]
		  + sume * zpv[l80 - 1][ilmp - 1][0][1]
@@ -127,7 +126,7 @@ void apc(
		  + suee * zpv[l80 - 1][ilmp - 1][1][1]
		) * cgc
	      );
	      gapp[imu90 - 1][ipo - 1] += (
	      gapp[imu90 - 1][ipo] += (
		(
		  summp * zpv[l80 - 1][ilmp - 1][0][0]
		  + sumep * zpv[l80 - 1][ilmp - 1][0][1]
@@ -141,19 +140,19 @@ void apc(
      } // ilmp loop
    } // l80 loop
  } // imu90 loop
  for (int ipo95 = 1; ipo95 <= 2; ipo95++) {
    sume = vec_gap[ipo95 - 1] * cimu;
    suee = vec_gap[2 + ipo95 - 1] * cof;
    suem = vec_gap[4 + ipo95 - 1] * cimu;
    gapr[0][ipo95 - 1] = real(sume - suem);
    gapr[1][ipo95 - 1] = real((sume + suem) * uim);
    gapr[2][ipo95 - 1] = real(suee);
    sumep = gapp[0][ipo95 - 1] * cimu;
    sueep = gapp[1][ipo95 - 1] * cof;
    suemp = gapp[2][ipo95 - 1] * cimu;
    gapp[0][ipo95 - 1] = sumep - suemp;
    gapp[1][ipo95 - 1] = (sumep + suemp) * uim;
    gapp[2][ipo95 - 1] = sueep;
  for (int ipo95 = 0; ipo95 < 2; ipo95++) {
    sume = vec_gap[ipo95] * cimu;
    suee = vec_gap[2 + ipo95] * cof;
    suem = vec_gap[4 + ipo95] * cimu;
    gapr[0][ipo95] = real(sume - suem);
    gapr[1][ipo95] = real((sume + suem) * uim);
    gapr[2][ipo95] = real(suee);
    sumep = gapp[0][ipo95] * cimu;
    sueep = gapp[1][ipo95] * cof;
    suemp = gapp[2][ipo95] * cimu;
    gapp[0][ipo95] = sumep - suemp;
    gapp[1][ipo95] = (sumep + suemp) * uim;
    gapp[2][ipo95] = sueep;
  } // ipo95 loop
  // Clean memory
  delete[] vec_ac;