Optimize array indexing for SPH (bc1d9774) · Commits · Giacomo Mulas / NP_TMcode

src/include/sph_subs.h

+197 −239

File changed.

Preview size limit exceeded, changes collapsed.

src/sphere/sphere.cpp

+101 −83

Original line number	Diff line number	Diff line
		@@ -152,17 +152,18 @@ void sphere() {
		const double half_pi = acos(0.0);
		const double pi = 2.0 * half_pi;
		double gcs = 0.0;
		for (int i116 = 1; i116 <= nsph; i116++) {
		int iogi = c1->iog[i116 - 1];
		if (iogi >= i116) {
		double gcss = pi * c1->ros[i116 - 1] * c1->ros[i116 - 1];
		c1->gcsv[i116 - 1] = gcss;
		int nsh = c1->nshl[i116 - 1];
		for (int j115 = 1; j115 <= nsh; j115++) {
		c1->rc[i116 - 1][j115 - 1] = sconf->rcf[i116 - 1][j115 - 1] * c1->ros[i116 - 1];
		for (int i116 = 0; i116 < nsph; i116++) {
		int i = i116 + 1;
		int iogi = c1->iog[i116];
		if (iogi >= i) {
		double gcss = pi * c1->ros[i116] * c1->ros[i116];
		c1->gcsv[i116] = gcss;
		int nsh = c1->nshl[i116];
		for (int j115 = 0; j115 < nsh; j115++) {
		c1->rc[i116][j115] = sconf->rcf[i116][j115] * c1->ros[i116];
		}
		}
		gcs += c1->gcsv[iogi - 1];
		gcs += c1->gcsv[iogi];
		}
		double **zpv = new double*[gconf->l_max]; //[gconf->l_max][3][2][2]; // Matrix: dim[LM x 3 x 2 x 2]
		for (int zi = 0; zi < gconf->l_max; zi++) {
		@@ -170,7 +171,7 @@ void sphere() {
		for (int zj = 0; zj < 3; zj++) {
		zpv[zi][zj] = new double*[2];
		for (int zk = 0; zk < 2; zk++) {
		zpv[zi][zj][zk] = new double[2];
		zpv[zi][zj][zk] = new double[2]();
		}
		}
		}
		@@ -203,9 +204,10 @@ void sphere() {
		fprintf(output, " VK=%15.7lE, XI IS SCALE FACTOR FOR LENGTHS\n", vk);
		fprintf(output, " \n");
		}
		for (int jxi488 = 1; jxi488 <= sconf->number_of_scales; jxi488++) {
		fprintf(output, "========== JXI =%3d ====================\n", jxi488);
		double xi = sconf->scale_vec[jxi488 - 1];
		for (int jxi488 = 0; jxi488 < sconf->number_of_scales; jxi488++) {
		int jxi = jxi488 + 1;
		fprintf(output, "========== JXI =%3d ====================\n", jxi);
		double xi = sconf->scale_vec[jxi488];
		if (sconf->idfc >= 0) {
		vk = xi * wn;
		vkarg = vk;
		@@ -216,24 +218,25 @@ void sphere() {
		fprintf(output, " XI=%15.7lE\n", xi);
		}
		tppoan.write(reinterpret_cast<char *>(&vk), sizeof(double));
		for (int i132 = 1; i132 <= nsph; i132++) {
		int iogi = sconf->iog_vec[i132 - 1];
		if (iogi != i132) {
		for (int l123 = 1; l123 <= gconf->l_max; l123++) {
		c1->rmi[l123 - 1][i132 - 1] = c1->rmi[l123 - 1][iogi - 1];
		c1->rei[l123 - 1][i132 - 1] = c1->rei[l123 - 1][iogi - 1];
		for (int i132 = 0; i132 < nsph; i132++) {
		int i = i132 + 1;
		int iogi = sconf->iog_vec[i132];
		if (iogi != i) {
		for (int l123 = 0; l123 < gconf->l_max; l123++) {
		c1->rmi[l123][i132] = c1->rmi[l123][iogi - 1];
		c1->rei[l123][i132] = c1->rei[l123][iogi - 1];
		}
		continue; // i132
		}
		int nsh = sconf->nshl_vec[i132 - 1];
		int nsh = sconf->nshl_vec[i132];
		int ici = (nsh + 1) / 2;
		if (sconf->idfc == 0) {
		for (int ic = 0; ic < ici; ic++)
		c2->dc0[ic] = sconf->dc0_matrix[ic][i132 - 1][0]; // WARNING: IDFC=0 is not tested!
		c2->dc0[ic] = sconf->dc0_matrix[ic][i132][0]; // WARNING: IDFC=0 is not tested!
		} else { // IDFC != 0
		if (jxi488 == 1) {
		if (jxi == 1) {
		for (int ic = 0; ic < ici; ic++) {
		c2->dc0[ic] = sconf->dc0_matrix[ic][i132 - 1][jxi488 - 1];
		c2->dc0[ic] = sconf->dc0_matrix[ic][i132][jxi488];
		}
		}
		}
		@@ -241,7 +244,7 @@ void sphere() {
		int jer = 0;
		int lcalc = 0;
		dme(
		gconf->l_max, i132, gconf->npnt, gconf->npntts, vkarg,
		gconf->l_max, i, gconf->npnt, gconf->npntts, vkarg,
		sconf->exdc, exri, c1, c2, jer, lcalc, arg
		);
		if (jer != 0) {
		@@ -252,7 +255,7 @@ void sphere() {
		return;
		}
		} // i132
		if (sconf->idfc >= 0 and nsph == 1 and jxi488 == gconf->jwtm) {
		if (sconf->idfc >= 0 and nsph == 1 and jxi == gconf->jwtm) {
		// This is the condition that writes the transition matrix to output.
		int is = 1111;
		fstream ttms;
		@@ -273,50 +276,51 @@ void sphere() {
		} else { // Failed to open output file. Should never happen.
		printf("ERROR: could not open TTMS file.\n");
		tppoan.close();
		fclose(output);
		}
		}
		double cs0 = 0.25 * vk * vk * vk / half_pi;
		//printf("DEBUG: cs0 = %lE\n", cs0);
		sscr0(tfsas, nsph, gconf->l_max, vk, exri, c1);
		printf("DEBUG: TFSAS = (%lE,%lE)\n", tfsas.real(), tfsas.imag());
		double sqk = vk * vk * sconf->exdc;
		aps(zpv, gconf->l_max, nsph, c1, sqk, gaps);
		rabas(gconf->in_pol, gconf->l_max, nsph, c1, tqse, tqspe, tqss, tqsps);
		for (int i170 = 1; i170 <= nsph; i170++) {
		if (c1->iog[i170 - 1] >= i170) {
		double albeds = c1->sscs[i170 - 1] / c1->sexs[i170 - 1];
		c1->sqscs[i170 - 1] *= sqsfi;
		c1->sqabs[i170 - 1] *= sqsfi;
		c1->sqexs[i170 - 1] *= sqsfi;
		fprintf(output, " SPHERE %2d\n", i170);
		if (c1->nshl[i170 - 1] != 1) {
		fprintf(output, " SIZE=%15.7lE\n", c2->vsz[i170 - 1]);
		for (int i170 = 0; i170 < nsph; i170++) {
		int i = i170 + 1;
		if (c1->iog[i170] >= i) {
		double albeds = c1->sscs[i170] / c1->sexs[i170];
		c1->sqscs[i170] *= sqsfi;
		c1->sqabs[i170] *= sqsfi;
		c1->sqexs[i170] *= sqsfi;
		fprintf(output, " SPHERE %2d\n", i);
		if (c1->nshl[i170] != 1) {
		fprintf(output, " SIZE=%15.7lE\n", c2->vsz[i170]);
		} else {
		fprintf(
		output,
		" SIZE=%15.7lE, REFRACTIVE INDEX=(%15.7lE,%15.7lE)\n",
		c2->vsz[i170 -1],
		c2->vkt[i170 - 1].real(),
		c2->vkt[i170 - 1].imag()
		c2->vsz[i170],
		c2->vkt[i170].real(),
		c2->vkt[i170].imag()
		);
		}
		fprintf(output, " ----- SCS ----- ABS ----- EXS ----- ALBEDS --\n");
		fprintf(
		output,
		" %14.7lE%15.7lE%15.7lE%15.7lE\n",
		c1->sscs[i170 - 1], c1->sabs[i170 - 1],
		c1->sexs[i170 - 1], albeds
		c1->sscs[i170], c1->sabs[i170],
		c1->sexs[i170], albeds
		);
		fprintf(output, " ---- SCS/GS -- ABS/GS -- EXS/GS ---\n");
		fprintf(
		output,
		" %14.7lE%15.7lE%15.7lE\n",
		c1->sqscs[i170 - 1], c1->sqabs[i170 - 1],
		c1->sqexs[i170 - 1]
		c1->sqscs[i170], c1->sqabs[i170],
		c1->sqexs[i170]
		);
		fprintf(output, " FSAS=%15.7lE%15.7lE\n", c1->fsas[i170 - 1].real(), c1->fsas[i170 - 1].imag());
		double csch = 2.0 * vk * sqsfi / c1->gcsv[i170 -1];
		s0 = c1->fsas[i170 - 1] * exri;
		fprintf(output, " FSAS=%15.7lE%15.7lE\n", c1->fsas[i170].real(), c1->fsas[i170].imag());
		double csch = 2.0 * vk * sqsfi / c1->gcsv[i170];
		s0 = c1->fsas[i170] * exri;
		double qschu = csch * s0.imag();
		double pschu = csch * s0.real();
		double s0mag = cs0 * abs(s0);
		@@ -325,32 +329,32 @@ void sphere() {
		" QSCHU=%15.7lE, PSCHU=%15.7lE, S0MAG=%15.7lE\n",
		qschu, pschu, s0mag
		);
		double rapr = c1->sexs[i170 - 1] - gaps[i170 - 1];
		double cosav = gaps[i170 - 1] / c1->sscs[i170 - 1];
		double rapr = c1->sexs[i170] - gaps[i170];
		double cosav = gaps[i170] / c1->sscs[i170];
		fprintf(output, " COSAV=%15.7lE, RAPRS=%15.7lE\n", cosav, rapr);
		fprintf(output, " IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 1, tqse[0][i170 - 1], tqss[0][i170 - 1]);
		fprintf(output, " IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 2, tqse[1][i170 - 1], tqss[1][i170 - 1]);
		tppoan.write(reinterpret_cast<char *>(&(tqse[0][i170 - 1])), sizeof(double));
		tppoan.write(reinterpret_cast<char *>(&(tqss[0][i170 - 1])), sizeof(double));
		double val = tqspe[0][i170 - 1].real();
		fprintf(output, " IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 1, tqse[0][i170], tqss[0][i170]);
		fprintf(output, " IPO=%2d, TQEk=%15.7lE, TQSk=%15.7lE\n", 2, tqse[1][i170], tqss[1][i170]);
		tppoan.write(reinterpret_cast<char *>(&(tqse[0][i170])), sizeof(double));
		tppoan.write(reinterpret_cast<char *>(&(tqss[0][i170])), sizeof(double));
		double val = tqspe[0][i170].real();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		val = tqspe[0][i170 - 1].imag();
		val = tqspe[0][i170].imag();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		val = tqsps[0][i170 - 1].real();
		val = tqsps[0][i170].real();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		val = tqsps[0][i170 - 1].imag();
		val = tqsps[0][i170].imag();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		tppoan.write(reinterpret_cast<char *>(&(tqse[1][i170 - 1])), sizeof(double));
		tppoan.write(reinterpret_cast<char *>(&(tqss[1][i170 - 1])), sizeof(double));
		val = tqspe[1][i170 - 1].real();
		tppoan.write(reinterpret_cast<char *>(&(tqse[1][i170])), sizeof(double));
		tppoan.write(reinterpret_cast<char *>(&(tqss[1][i170])), sizeof(double));
		val = tqspe[1][i170].real();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		val = tqspe[1][i170 - 1].imag();
		val = tqspe[1][i170].imag();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		val = tqsps[1][i170 - 1].real();
		val = tqsps[1][i170].real();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		val = tqsps[1][i170 - 1].imag();
		val = tqsps[1][i170].imag();
		tppoan.write(reinterpret_cast<char *>(&val), sizeof(double));
		} // End if iog[i170 - 1] >= i170
		} // End if iog[i170] >= i
		} // i170 loop
		if (nsph != 1) {
		fprintf(output, " FSAT=(%15.7lE,%15.7lE)\n", tfsas.real(), tfsas.imag());
		@@ -366,10 +370,12 @@ void sphere() {
		);
		}
		th = th1;
		for (int jth486 = 1; jth486 <= nth; jth486++) { // OpenMP parallelizable section
		for (int jth486 = 0; jth486 < nth; jth486++) { // OpenMP parallelizable section
		int jth = jth486 + 1;
		ph = ph1;
		for (int jph484 = 1; jph484 <= nph; jph484++) {
		bool goto182 = (nk == 1) && (jxi488 > 1);
		for (int jph484 = 0; jph484 < nph; jph484++) {
		int jph = jph484 + 1;
		bool goto182 = (nk == 1) && (jxi > 1);
		if (!goto182) {
		upvmp(th, ph, 0, cost, sint, cosp, sinp, u, upmp, unmp);
		}
		@@ -385,7 +391,8 @@ void sphere() {
		}
		double thsl = ths1;
		double phsph = 0.0;
		for (int jths482 = 1; jths482 <= nths; jths482++) {
		for (int jths482 = 0; jths482 < nths; jths482++) {
		int jths = jths482 + 1;
		double ths = thsl;
		int icspnv = 0;
		if (gconf->meridional_type > 1) ths = th + thsca;
		@@ -397,19 +404,20 @@ void sphere() {
		if (phsph != 0.0) icspnv = 1;
		}
		double phs = phs1;
		for (int jphs480 = 1; jphs480 <= nphs; jphs480++) {
		for (int jphs480 = 0; jphs480 < nphs; jphs480++) {
		int jphs = jphs480 + 1;
		if (gconf->meridional_type >= 1) {
		phs = ph + phsph;
		if (phs >= 360.0) phs -= 360.0;
		}
		bool goto190 = (nks == 1) && ((jxi488 > 1) \|\| (jth486 > 1) \|\| (jph484 > 1));
		bool goto190 = (nks == 1) && ((jxi > 1) \|\| (jth > 1) \|\| (jph > 1));
		if (!goto190) {
		upvmp(ths, phs, icspnv, costs, sints, cosps, sinps, us, upsmp, unsmp);
		if (gconf->meridional_type >= 0) {
		wmamp(2, costs, sints, cosps, sinps, gconf->in_pol, gconf->l_max, 0, nsph, args, us, upsmp, unsmp, c1);
		}
		}
		if (nkks != 0 \|\| jxi488 == 1) {
		if (nkks != 0 \|\| jxi == 1) {
		upvsp(u, upmp, unmp, us, upsmp, unsmp, up, un, ups, uns, duk, isq, ibf, scan, cfmp, sfmp, cfsp, sfsp);
		if (gconf->meridional_type < 0) {
		wmasp(
		@@ -418,9 +426,9 @@ void sphere() {
		gconf->l_max, 0, nsph, argi, args, c1
		);
		}
		for (int i193 = 1; i193 <= 3; i193++) {
		un[i193 - 1] = unmp[i193 - 1];
		uns[i193 - 1] = unsmp[i193 - 1];
		for (int i193 = 0; i193 < 3; i193++) {
		un[i193] = unmp[i193];
		uns[i193] = unsmp[i193];
		}
		}
		if (gconf->meridional_type < 0) jw = 1;
		@@ -438,7 +446,7 @@ void sphere() {
		fprintf(
		output,
		"******** JTH =%3d, JPH =%3d, JTHS =%3d, JPHS =%3d ******************\n",
		jth486, jph484, jths482, jphs480
		jth, jph, jths, jphs
		);
		fprintf(
		output,
		@@ -455,25 +463,25 @@ void sphere() {
		fprintf(output, " UN=(%12.5lE,%12.5lE,%12.5lE)\n", un[0], un[1], un[2]);
		}
		sscr2(nsph, gconf->l_max, vk, exri, c1);
		for (int ns226 = 1; ns226 <= nsph; ns226++) {
		fprintf(output, " SPHERE %2d\n", ns226);
		for (int ns226 = 0; ns226 < nsph; ns226++) {
		int ns = ns226 + 1;
		fprintf(output, " SPHERE %2d\n", ns);
		fprintf(
		output, " SAS(1,1)=%15.7lE%15.7lE, SAS(2,1)=%15.7lE%15.7lE\n",
		c1->sas[ns226 - 1][0][0].real(), c1->sas[ns226 - 1][0][0].imag(),
		c1->sas[ns226 - 1][1][0].real(), c1->sas[ns226 - 1][1][0].imag()
		c1->sas[ns226][0][0].real(), c1->sas[ns226][0][0].imag(),
		c1->sas[ns226][1][0].real(), c1->sas[ns226][1][0].imag()
		);
		fprintf(
		output, " SAS(1,2)=%15.7lE%15.7lE, SAS(2,2)=%15.7lE%15.7lE\n",
		c1->sas[ns226 - 1][0][1].real(), c1->sas[ns226 - 1][0][1].imag(),
		c1->sas[ns226 - 1][1][1].real(), c1->sas[ns226 - 1][1][1].imag()
		c1->sas[ns226][0][1].real(), c1->sas[ns226][0][1].imag(),
		c1->sas[ns226][1][1].real(), c1->sas[ns226][1][1].imag()
		);
		if (jths482 == 1 && jphs480 == 1)
		if (jths == 1 && jphs == 1)
		fprintf(
		output, " Fx=%15.7lE, Fy=%15.7lE, Fz=%15.7lE\n",
		frx, fry, frz
		);
		for (int i225 = 0; i225 < 16; i225++)
		vint[i225] = c1->vints[ns226 - 1][i225];
		for (int i225 = 0; i225 < 16; i225++) vint[i225] = c1->vints[ns226][i225];
		mmulc(vint, cmullr, cmul);
		fprintf(output, " MULS\n ");
		for (int imul = 0; imul < 4; imul++) {
		@@ -519,6 +527,16 @@ void sphere() {
		fclose(output);
		delete c1;
		delete c2;
		for (int zi = gconf->l_max - 1; zi > -1; zi--) {
		for (int zj = 0; zj < 3; zj++) {
		for (int zk = 0; zk < 2; zk++) {
		delete[] zpv[zi][zj][zk];
		}
		delete[] zpv[zi][zj];
		}
		delete[] zpv[zi];
		}
		delete[] zpv;
		delete[] duk;
		delete[] u;
		delete[] us;
		@@ -534,7 +552,7 @@ void sphere() {
		delete[] argi;
		delete[] args;
		delete[] gaps;
		for (int i = 0; i < 4; i++) {
		for (int i = 3; i > -1; i--) {
		delete[] cmul[i];
		delete[] cmullr[i];
		}