Commit bc1d9774 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Optimize array indexing for SPH

parent 8eb6c69a
Loading
Loading
Loading
Loading
+197 −239

File changed.

Preview size limit exceeded, changes collapsed.

+101 −83
Original line number Diff line number Diff line
@@ -152,17 +152,18 @@ void sphere() {
		const double half_pi = acos(0.0);
		const double pi = 2.0 * half_pi;
		double gcs = 0.0;
		for (int i116 = 1; i116 <= nsph; i116++) {
			int iogi = c1->iog[i116 - 1];
			if (iogi >= i116) {
				double gcss = pi * c1->ros[i116 - 1] * c1->ros[i116 - 1];
				c1->gcsv[i116 - 1] = gcss;
				int nsh = c1->nshl[i116 - 1];
				for (int j115 = 1; j115 <= nsh; j115++) {
					c1->rc[i116 - 1][j115 - 1] = sconf->rcf[i116 - 1][j115 - 1] * c1->ros[i116 - 1];
		for (int i116 = 0; i116 < nsph; i116++) {
			int i = i116 + 1;
			int iogi = c1->iog[i116];
			if (iogi >= i) {
				double gcss = pi * c1->ros[i116] * c1->ros[i116];
				c1->gcsv[i116] = gcss;
				int nsh = c1->nshl[i116];
				for (int j115 = 0; j115 < nsh; j115++) {
					c1->rc[i116][j115] = sconf->rcf[i116][j115] * c1->ros[i116];
				}
			}
			gcs += c1->gcsv[iogi - 1];
			gcs += c1->gcsv[iogi];
		}
		double ****zpv = new double***[gconf->l_max]; //[gconf->l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2]
		for (int zi = 0; zi < gconf->l_max; zi++) {
@@ -170,7 +171,7 @@ void sphere() {
			for (int zj = 0; zj < 3; zj++) {
				zpv[zi][zj] = new double*[2];
				for (int zk = 0; zk < 2; zk++) {
					zpv[zi][zj][zk] = new double[2];
					zpv[zi][zj][zk] = new double[2]();
				}
			}
		}
@@ -203,9 +204,10 @@ void sphere() {
				fprintf(output, "  VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", vk);
				fprintf(output, " \n");
			}
			for (int jxi488 = 1; jxi488 <= sconf->number_of_scales; jxi488++) {
				fprintf(output, "========== JXI =%3d ====================\n", jxi488);
				double xi = sconf->scale_vec[jxi488 - 1];
			for (int jxi488 = 0; jxi488 < sconf->number_of_scales; jxi488++) {
				int jxi = jxi488 + 1;
				fprintf(output, "========== JXI =%3d ====================\n", jxi);
				double xi = sconf->scale_vec[jxi488];
				if (sconf->idfc >= 0) {
					vk = xi * wn;
					vkarg = vk;
@@ -216,24 +218,25 @@ void sphere() {
					fprintf(output, "  XI=%15.7lE\n", xi);
				}
				tppoan.write(reinterpret_cast<char *>(&vk), sizeof(double));
				for (int i132 = 1; i132 <= nsph; i132++) {
					int iogi = sconf->iog_vec[i132 - 1];
					if (iogi != i132) {
						for (int l123 = 1; l123 <= gconf->l_max; l123++) {
							c1->rmi[l123 - 1][i132 - 1] = c1->rmi[l123 - 1][iogi - 1];
							c1->rei[l123 - 1][i132 - 1] = c1->rei[l123 - 1][iogi - 1];
				for (int i132 = 0; i132 < nsph; i132++) {
					int i = i132 + 1;
					int iogi = sconf->iog_vec[i132];
					if (iogi != i) {
						for (int l123 = 0; l123 < gconf->l_max; l123++) {
							c1->rmi[l123][i132] = c1->rmi[l123][iogi - 1];
							c1->rei[l123][i132] = c1->rei[l123][iogi - 1];
						}
						continue; // i132
					}
					int nsh = sconf->nshl_vec[i132 - 1];
					int nsh = sconf->nshl_vec[i132];
					int ici = (nsh + 1) / 2;
					if (sconf->idfc == 0) {
						for (int ic = 0; ic < ici; ic++)
							c2->dc0[ic] = sconf->dc0_matrix[ic][i132 - 1][0]; // WARNING: IDFC=0 is not tested!
							c2->dc0[ic] = sconf->dc0_matrix[ic][i132][0]; // WARNING: IDFC=0 is not tested!
					} else { // IDFC != 0
						if (jxi488 == 1) {
						if (jxi == 1) {
							for (int ic = 0; ic < ici; ic++) {
								c2->dc0[ic] = sconf->dc0_matrix[ic][i132 - 1][jxi488 - 1];
								c2->dc0[ic] = sconf->dc0_matrix[ic][i132][jxi488];
							}
						}
					}
@@ -241,7 +244,7 @@ void sphere() {
					int jer = 0;
					int lcalc = 0;
					dme(
							gconf->l_max, i132, gconf->npnt, gconf->npntts, vkarg,
							gconf->l_max, i, gconf->npnt, gconf->npntts, vkarg,
							sconf->exdc, exri, c1, c2, jer, lcalc, arg
					);
					if (jer != 0) {
@@ -252,7 +255,7 @@ void sphere() {
						return;
					}
				} // i132
				if (sconf->idfc >= 0 and nsph == 1 and jxi488 == gconf->jwtm) {
				if (sconf->idfc >= 0 and nsph == 1 and jxi == gconf->jwtm) {
					// This is the condition that writes the transition matrix to output.
					int is = 1111;
					fstream ttms;
@@ -273,50 +276,51 @@ void sphere() {
					} else { // Failed to open output file. Should never happen.
						printf("ERROR: could not open TTMS file.\n");
						tppoan.close();
						fclose(output);
					}
				}
				double cs0 = 0.25 * vk * vk * vk / half_pi;
				//printf("DEBUG: cs0 = %lE\n", cs0);
				sscr0(tfsas, nsph, gconf->l_max, vk, exri, c1);
				printf("DEBUG: TFSAS = (%lE,%lE)\n", tfsas.real(), tfsas.imag());
				double sqk = vk * vk * sconf->exdc;
				aps(zpv, gconf->l_max, nsph, c1, sqk, gaps);
				rabas(gconf->in_pol, gconf->l_max, nsph, c1, tqse, tqspe, tqss, tqsps);
				for (int i170 = 1; i170 <= nsph; i170++) {
					if (c1->iog[i170 - 1] >= i170) {
						double albeds = c1->sscs[i170 - 1] / c1->sexs[i170 - 1];
						c1->sqscs[i170 - 1] *= sqsfi;
						c1->sqabs[i170 - 1] *= sqsfi;
						c1->sqexs[i170 - 1] *= sqsfi;
						fprintf(output, "     SPHERE %2d\n", i170);
						if (c1->nshl[i170 - 1] != 1) {
							fprintf(output, "  SIZE=%15.7lE\n", c2->vsz[i170 - 1]);
				for (int i170 = 0; i170 < nsph; i170++) {
					int i = i170 + 1;
					if (c1->iog[i170] >= i) {
						double albeds = c1->sscs[i170] / c1->sexs[i170];
						c1->sqscs[i170] *= sqsfi;
						c1->sqabs[i170] *= sqsfi;
						c1->sqexs[i170] *= sqsfi;
						fprintf(output, "     SPHERE %2d\n", i);
						if (c1->nshl[i170] != 1) {
							fprintf(output, "  SIZE=%15.7lE\n", c2->vsz[i170]);
						} else {
							fprintf(
									output,
									"  SIZE=%15.7lE, REFRACTIVE INDEX=(%15.7lE,%15.7lE)\n",
									c2->vsz[i170 -1],
									c2->vkt[i170 - 1].real(),
									c2->vkt[i170 - 1].imag()
									c2->vsz[i170],
									c2->vkt[i170].real(),
									c2->vkt[i170].imag()
							);
						}
						fprintf(output, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n");
						fprintf(
								output,
								" %14.7lE%15.7lE%15.7lE%15.7lE\n",
								c1->sscs[i170 - 1], c1->sabs[i170 - 1],
								c1->sexs[i170 - 1], albeds
								c1->sscs[i170], c1->sabs[i170],
								c1->sexs[i170], albeds
						);
						fprintf(output, " ---- SCS/GS -- ABS/GS -- EXS/GS ---\n");
						fprintf(
								output,
								" %14.7lE%15.7lE%15.7lE\n",
								c1->sqscs[i170 - 1], c1->sqabs[i170 - 1],
								c1->sqexs[i170 - 1]
								c1->sqscs[i170], c1->sqabs[i170],
								c1->sqexs[i170]
						);
						fprintf(output, "  FSAS=%15.7lE%15.7lE\n", c1->fsas[i170 - 1].real(), c1->fsas[i170 - 1].imag());
						double csch = 2.0 * vk * sqsfi / c1->gcsv[i170 -1];
						s0 = c1->fsas[i170 - 1] * exri;
						fprintf(output, "  FSAS=%15.7lE%15.7lE\n", c1->fsas[i170].real(), c1->fsas[i170].imag());
						double csch = 2.0 * vk * sqsfi / c1->gcsv[i170];
						s0 = c1->fsas[i170] * exri;
						double qschu = csch * s0.imag();
						double pschu = csch * s0.real();
						double s0mag = cs0 * abs(s0);
@@ -325,32 +329,32 @@ void sphere() {
								"  QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
								qschu, pschu, s0mag
						);
						double rapr = c1->sexs[i170 - 1] - gaps[i170 - 1];
						double cosav = gaps[i170 - 1] / c1->sscs[i170 - 1];
						double rapr = c1->sexs[i170] - gaps[i170];
						double cosav = gaps[i170] / c1->sscs[i170];
						fprintf(output, "  COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr);
						fprintf(output, "  IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 1, tqse[0][i170 - 1], tqss[0][i170 - 1]);
						fprintf(output, "  IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 2, tqse[1][i170 - 1], tqss[1][i170 - 1]);
						tppoan.write(reinterpret_cast<char *>(&(tqse[0][i170 - 1])), sizeof(double));
						tppoan.write(reinterpret_cast<char *>(&(tqss[0][i170 - 1])), sizeof(double));
						double val = tqspe[0][i170 - 1].real();
						fprintf(output, "  IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 1, tqse[0][i170], tqss[0][i170]);
						fprintf(output, "  IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 2, tqse[1][i170], tqss[1][i170]);
						tppoan.write(reinterpret_cast<char *>(&(tqse[0][i170])), sizeof(double));
						tppoan.write(reinterpret_cast<char *>(&(tqss[0][i170])), sizeof(double));
						double val = tqspe[0][i170].real();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						val = tqspe[0][i170 - 1].imag();
						val = tqspe[0][i170].imag();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						val = tqsps[0][i170 - 1].real();
						val = tqsps[0][i170].real();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						val = tqsps[0][i170 - 1].imag();
						val = tqsps[0][i170].imag();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						tppoan.write(reinterpret_cast<char *>(&(tqse[1][i170 - 1])), sizeof(double));
						tppoan.write(reinterpret_cast<char *>(&(tqss[1][i170 - 1])), sizeof(double));
						val = tqspe[1][i170 - 1].real();
						tppoan.write(reinterpret_cast<char *>(&(tqse[1][i170])), sizeof(double));
						tppoan.write(reinterpret_cast<char *>(&(tqss[1][i170])), sizeof(double));
						val = tqspe[1][i170].real();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						val = tqspe[1][i170 - 1].imag();
						val = tqspe[1][i170].imag();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						val = tqsps[1][i170 - 1].real();
						val = tqsps[1][i170].real();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
						val = tqsps[1][i170 - 1].imag();
						val = tqsps[1][i170].imag();
						tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
					} // End if iog[i170 - 1] >= i170
					} // End if iog[i170] >= i
				} // i170 loop
				if (nsph != 1) {
					fprintf(output, "  FSAT=(%15.7lE,%15.7lE)\n", tfsas.real(), tfsas.imag());
@@ -366,10 +370,12 @@ void sphere() {
					);
				}
				th = th1;
				for (int jth486 = 1; jth486 <= nth; jth486++) { // OpenMP parallelizable section
				for (int jth486 = 0; jth486 < nth; jth486++) { // OpenMP parallelizable section
					int jth = jth486 + 1;
					ph = ph1;
					for (int jph484 = 1; jph484 <= nph; jph484++) {
						bool goto182 = (nk == 1) && (jxi488 > 1);
					for (int jph484 = 0; jph484 < nph; jph484++) {
						int jph = jph484 + 1;
						bool goto182 = (nk == 1) && (jxi > 1);
						if (!goto182) {
							upvmp(th, ph, 0, cost, sint, cosp, sinp, u, upmp, unmp);
						}
@@ -385,7 +391,8 @@ void sphere() {
						}
						double thsl = ths1;
						double phsph = 0.0;
						for (int jths482 = 1; jths482 <= nths; jths482++) {
						for (int jths482 = 0; jths482 < nths; jths482++) {
							int jths = jths482 + 1;
							double ths = thsl;
							int icspnv = 0;
							if (gconf->meridional_type > 1) ths = th + thsca;
@@ -397,19 +404,20 @@ void sphere() {
								if (phsph != 0.0) icspnv = 1;
							}
							double phs = phs1;
							for (int jphs480 = 1; jphs480 <= nphs; jphs480++) {
							for (int jphs480 = 0; jphs480 < nphs; jphs480++) {
								int jphs = jphs480 + 1;
								if (gconf->meridional_type >= 1) {
									phs = ph + phsph;
									if (phs >= 360.0) phs -= 360.0;
								}
								bool goto190 = (nks == 1) && ((jxi488 > 1) || (jth486 > 1) || (jph484 > 1));
								bool goto190 = (nks == 1) && ((jxi > 1) || (jth > 1) || (jph > 1));
								if (!goto190) {
									upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, us, upsmp, unsmp);
									if (gconf->meridional_type >= 0) {
										wmamp(2, costs, sints, cosps, sinps, gconf->in_pol, gconf->l_max, 0, nsph, args, us, upsmp, unsmp, c1);
									}
								}
								if (nkks != 0 || jxi488 == 1) {
								if (nkks != 0 || jxi == 1) {
									upvsp(u, upmp, unmp, us, upsmp, unsmp, up, un, ups, uns, duk, isq, ibf, scan, cfmp, sfmp, cfsp, sfsp);
									if (gconf->meridional_type < 0) {
										wmasp(
@@ -418,9 +426,9 @@ void sphere() {
												gconf->l_max, 0, nsph, argi, args, c1
										);
									}
									for (int i193 = 1; i193 <= 3; i193++) {
										un[i193 - 1] = unmp[i193 - 1];
										uns[i193 - 1] = unsmp[i193 - 1];
									for (int i193 = 0; i193 < 3; i193++) {
										un[i193] = unmp[i193];
										uns[i193] = unsmp[i193];
									}
								}
								if (gconf->meridional_type < 0) jw = 1;
@@ -438,7 +446,7 @@ void sphere() {
								fprintf(
										output,
										"********** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ********************\n",
										jth486, jph484, jths482, jphs480
										jth, jph, jths, jphs
								);
								fprintf(
										output,
@@ -455,25 +463,25 @@ void sphere() {
									fprintf(output, "  UN=(%12.5lE,%12.5lE,%12.5lE)\n", un[0], un[1], un[2]);
								}
								sscr2(nsph, gconf->l_max, vk, exri, c1);
								for (int ns226 = 1; ns226 <= nsph; ns226++) {
									fprintf(output, "     SPHERE %2d\n", ns226);
								for (int ns226 = 0; ns226 < nsph; ns226++) {
									int ns = ns226 + 1;
									fprintf(output, "     SPHERE %2d\n", ns);
									fprintf(
											output, "  SAS(1,1)=%15.7lE%15.7lE, SAS(2,1)=%15.7lE%15.7lE\n",
											c1->sas[ns226 - 1][0][0].real(), c1->sas[ns226 - 1][0][0].imag(),
											c1->sas[ns226 - 1][1][0].real(), c1->sas[ns226 - 1][1][0].imag()
											c1->sas[ns226][0][0].real(), c1->sas[ns226][0][0].imag(),
											c1->sas[ns226][1][0].real(), c1->sas[ns226][1][0].imag()
									);
									fprintf(
											output, "  SAS(1,2)=%15.7lE%15.7lE, SAS(2,2)=%15.7lE%15.7lE\n",
											c1->sas[ns226 - 1][0][1].real(), c1->sas[ns226 - 1][0][1].imag(),
											c1->sas[ns226 - 1][1][1].real(), c1->sas[ns226 - 1][1][1].imag()
											c1->sas[ns226][0][1].real(), c1->sas[ns226][0][1].imag(),
											c1->sas[ns226][1][1].real(), c1->sas[ns226][1][1].imag()
									);
									if (jths482 == 1 && jphs480 == 1)
									if (jths == 1 && jphs == 1)
										fprintf(
												output, "  Fx=%15.7lE, Fy=%15.7lE, Fz=%15.7lE\n",
												frx, fry, frz
										);
									for (int i225 = 0; i225 < 16; i225++)
										vint[i225] = c1->vints[ns226 - 1][i225];
									for (int i225 = 0; i225 < 16; i225++) vint[i225] = c1->vints[ns226][i225];
									mmulc(vint, cmullr, cmul);
									fprintf(output, "  MULS\n        ");
									for (int imul = 0; imul < 4; imul++) {
@@ -519,6 +527,16 @@ void sphere() {
		fclose(output);
		delete c1;
		delete c2;
		for (int zi = gconf->l_max - 1; zi > -1; zi--) {
			for (int zj = 0; zj < 3; zj++) {
				for (int zk = 0; zk < 2; zk++) {
					delete[] zpv[zi][zj][zk];
				}
				delete[] zpv[zi][zj];
			}
			delete[] zpv[zi];
		}
		delete[] zpv;
		delete[] duk;
		delete[] u;
		delete[] us;
@@ -534,7 +552,7 @@ void sphere() {
		delete[] argi;
		delete[] args;
		delete[] gaps;
		for (int i = 0; i < 4; i++) {
		for (int i = 3; i > -1; i--) {
			delete[] cmul[i];
			delete[] cmullr[i];
		}