Loading src/libnptm/clu_subs.cpp +61 −73 Original line number Diff line number Diff line Loading @@ -166,21 +166,9 @@ void apcra( const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; dcomplex a11, a12, a21, a22, sum1, sum2, fc; double ****svw = new double***[le]; dcomplex ****svs = new dcomplex***[le]; for (int i = 0; i < le; i++) { svw[i] = new double**[3]; svs[i] = new dcomplex**[3]; for (int j = 0; j < 3; j++) { svw[i][j] = new double*[2]; svs[i][j] = new dcomplex*[2]; for (int k = 0; k < 2; k++) { svw[i][j][k] = new double[2](); svs[i][j][k] = new dcomplex[2](); } } } dcomplex a11, a12, a21, a22, fc; double *vec_svw = new double[le * 12](); dcomplex *vec_svs = new dcomplex[le * 12](); int nlem = le * (le + 2); for (int l28 = 1; l28 <= le; l28++) { int lpo = l28 + 1; Loading @@ -197,28 +185,28 @@ void apcra( if (inpol == 0) { double cgs = cgmpo + cgmmo; double cgd = cgmpo - cgmmo; svw[l28 - 1][ilmp - 1][0][0] = cgs; svw[l28 - 1][ilmp - 1][0][1] = cgd; svw[l28 - 1][ilmp - 1][1][0] = cgd; svw[l28 - 1][ilmp - 1][1][1] = cgs; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgs; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgs; } else { // label 22 svw[l28 - 1][ilmp - 1][0][0] = cgmpo; svw[l28 - 1][ilmp - 1][1][0] = cgmpo; svw[l28 - 1][ilmp - 1][0][1] = -cgmmo; svw[l28 - 1][ilmp - 1][1][1] = cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = -cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgmmo; } // label 26 } // ilmp loop } // l28 loop for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted for (int ilmp = 1; ilmp <= 3; ilmp++) { for (int ipa = 1; ipa <= 2; ipa++) { for (int ipamp = 1; ipamp <= 2; ipamp++) { svs[l30 - 1][ilmp - 1][ipa - 1][ipamp - 1] = cc0; } } // ipa loop } // ilmp loop } // l30 loop // for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted // for (int ilmp = 1; ilmp <= 3; ilmp++) { // for (int ipa = 1; ipa <= 2; ipa++) { // for (int ipamp = 1; ipamp <= 2; ipamp++) { // vec_svs[12 * (l30 - 1) + 4 * (ilmp - 1) + 2 * (ipa - 1) + ipamp - 1] = cc0; // } // } // ipa loop // } // ilmp loop // } // l30 loop for (int l58 = 1; l58 <= le; l58 ++) { int lpo = l58 + 1; int ltpo = l58 + lpo; Loading Loading @@ -276,19 +264,21 @@ void apcra( double z12 = zpv[ls - 1][ilsmp - 1][0][1]; double z21 = zpv[ls - 1][ilsmp - 1][1][0]; double z22 = zpv[ls - 1][ilsmp - 1][1][1]; svs[l58 - 1][ilmp - 1][0][0] += ((ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][0][1] += ((ca11 * a12 * z11 vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1)] += ( ( ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22 ) * fc ); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 1] += ((ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22) * fc); svs[l58 - 1][ilmp - 1][1][0] += ((ca12 * a11 * z11 vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 2] += ((ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][1][1] += ((ca12 * a12 * z11 vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 3] += ((ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22) * fc); Loading @@ -301,8 +291,8 @@ void apcra( } // im54 loop } // ilmp loop } // l58 loop sum1 = cc0; sum2 = cc0; dcomplex sum1 = cc0; dcomplex sum2 = cc0; for (int l68 = 1; l68 <= le; l68++) { //int lpo = l68 + 1; //int ltpo = l68 + lpo; Loading @@ -311,29 +301,29 @@ void apcra( if ((l68 == 1 && ilmp == 1) || (l68 == le && ilmp == 3)) continue; // ilmp loop if (inpol == 0) { sum1 += ( svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); sum2 += ( svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); } else { // label 62 sum1 += ( svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); sum2 += ( svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); } // label 66, ends ilmp loop } // ilmp loop Loading Loading @@ -364,20 +354,18 @@ void apcra( } // Clean memory for (int i = le - 1; i > -1; i--) { for (int j = 2; j > -1; j--) { for (int k = 1; k > -1; k--) { delete[] svw[i][j][k]; delete[] svs[i][j][k]; } delete[] svw[i][j]; delete[] svs[i][j]; } delete[] svw[i]; delete[] svs[i]; } delete[] svw; delete[] svs; // for (int i = le - 1; i > -1; i--) { // for (int j = 2; j > -1; j--) { // delete[] svw[i][j]; // delete[] svs[i][j]; // } // delete[] svw[i]; // delete[] svs[i]; // } // delete[] svw; // delete[] svs; delete[] vec_svw; delete[] vec_svs; } #ifdef USE_TARGET_OFFLOAD Loading Loading
src/libnptm/clu_subs.cpp +61 −73 Original line number Diff line number Diff line Loading @@ -166,21 +166,9 @@ void apcra( const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; dcomplex uimtl, uimtls, ca11, ca12, ca21, ca22; dcomplex a11, a12, a21, a22, sum1, sum2, fc; double ****svw = new double***[le]; dcomplex ****svs = new dcomplex***[le]; for (int i = 0; i < le; i++) { svw[i] = new double**[3]; svs[i] = new dcomplex**[3]; for (int j = 0; j < 3; j++) { svw[i][j] = new double*[2]; svs[i][j] = new dcomplex*[2]; for (int k = 0; k < 2; k++) { svw[i][j][k] = new double[2](); svs[i][j][k] = new dcomplex[2](); } } } dcomplex a11, a12, a21, a22, fc; double *vec_svw = new double[le * 12](); dcomplex *vec_svs = new dcomplex[le * 12](); int nlem = le * (le + 2); for (int l28 = 1; l28 <= le; l28++) { int lpo = l28 + 1; Loading @@ -197,28 +185,28 @@ void apcra( if (inpol == 0) { double cgs = cgmpo + cgmmo; double cgd = cgmpo - cgmmo; svw[l28 - 1][ilmp - 1][0][0] = cgs; svw[l28 - 1][ilmp - 1][0][1] = cgd; svw[l28 - 1][ilmp - 1][1][0] = cgd; svw[l28 - 1][ilmp - 1][1][1] = cgs; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgs; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgd; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgs; } else { // label 22 svw[l28 - 1][ilmp - 1][0][0] = cgmpo; svw[l28 - 1][ilmp - 1][1][0] = cgmpo; svw[l28 - 1][ilmp - 1][0][1] = -cgmmo; svw[l28 - 1][ilmp - 1][1][1] = cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1)] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 2] = cgmpo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 1] = -cgmmo; vec_svw[12 * (l28 - 1) + 4 * (ilmp - 1) + 3] = cgmmo; } // label 26 } // ilmp loop } // l28 loop for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted for (int ilmp = 1; ilmp <= 3; ilmp++) { for (int ipa = 1; ipa <= 2; ipa++) { for (int ipamp = 1; ipamp <= 2; ipamp++) { svs[l30 - 1][ilmp - 1][ipa - 1][ipamp - 1] = cc0; } } // ipa loop } // ilmp loop } // l30 loop // for (int l30 = 1; l30 <= le; l30++) { // 0-init: can be omitted // for (int ilmp = 1; ilmp <= 3; ilmp++) { // for (int ipa = 1; ipa <= 2; ipa++) { // for (int ipamp = 1; ipamp <= 2; ipamp++) { // vec_svs[12 * (l30 - 1) + 4 * (ilmp - 1) + 2 * (ipa - 1) + ipamp - 1] = cc0; // } // } // ipa loop // } // ilmp loop // } // l30 loop for (int l58 = 1; l58 <= le; l58 ++) { int lpo = l58 + 1; int ltpo = l58 + lpo; Loading Loading @@ -276,19 +264,21 @@ void apcra( double z12 = zpv[ls - 1][ilsmp - 1][0][1]; double z21 = zpv[ls - 1][ilsmp - 1][1][0]; double z22 = zpv[ls - 1][ilsmp - 1][1][1]; svs[l58 - 1][ilmp - 1][0][0] += ((ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][0][1] += ((ca11 * a12 * z11 vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1)] += ( ( ca11 * a11 * z11 + ca11 * a21 * z12 + ca21 * a11 * z21 + ca21 * a21 * z22 ) * fc ); vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 1] += ((ca11 * a12 * z11 + ca11 * a22 * z12 + ca21 * a12 * z21 + ca21 * a22 * z22) * fc); svs[l58 - 1][ilmp - 1][1][0] += ((ca12 * a11 * z11 vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 2] += ((ca12 * a11 * z11 + ca12 * a21 * z12 + ca22 * a11 * z21 + ca22 * a21 * z22) * fc); svs[l58 - 1][ilmp - 1][1][1] += ((ca12 * a12 * z11 vec_svs[12 * (l58 - 1) + 4 * (ilmp - 1) + 3] += ((ca12 * a12 * z11 + ca12 * a22 * z12 + ca22 * a12 * z21 + ca22 * a22 * z22) * fc); Loading @@ -301,8 +291,8 @@ void apcra( } // im54 loop } // ilmp loop } // l58 loop sum1 = cc0; sum2 = cc0; dcomplex sum1 = cc0; dcomplex sum2 = cc0; for (int l68 = 1; l68 <= le; l68++) { //int lpo = l68 + 1; //int ltpo = l68 + lpo; Loading @@ -311,29 +301,29 @@ void apcra( if ((l68 == 1 && ilmp == 1) || (l68 == le && ilmp == 3)) continue; // ilmp loop if (inpol == 0) { sum1 += ( svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); sum2 += ( svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); } else { // label 62 sum1 += ( svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][0] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][0] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1)] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); sum2 += ( svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][0][0] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][0][1] + svw[l68 - 1][ilmp - 1][0][1] * svs[l68 - 1][ilmp - 1][1][0] + svw[l68 - 1][ilmp - 1][1][1] * svs[l68 - 1][ilmp - 1][1][1] vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1)] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 1] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 2] + vec_svw[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] * vec_svs[12 * (l68 - 1) + 4 * (ilmp - 1) + 3] ); } // label 66, ends ilmp loop } // ilmp loop Loading Loading @@ -364,20 +354,18 @@ void apcra( } // Clean memory for (int i = le - 1; i > -1; i--) { for (int j = 2; j > -1; j--) { for (int k = 1; k > -1; k--) { delete[] svw[i][j][k]; delete[] svs[i][j][k]; } delete[] svw[i][j]; delete[] svs[i][j]; } delete[] svw[i]; delete[] svs[i]; } delete[] svw; delete[] svs; // for (int i = le - 1; i > -1; i--) { // for (int j = 2; j > -1; j--) { // delete[] svw[i][j]; // delete[] svs[i][j]; // } // delete[] svw[i]; // delete[] svs[i]; // } // delete[] svw; // delete[] svs; delete[] vec_svw; delete[] vec_svs; } #ifdef USE_TARGET_OFFLOAD Loading