Merge branch 'master' into parallel_trapping to update to trapping offload (2486e34c) · Commits · Giacomo Mulas / NP_TMcode

.gitlab-ci.yml

+3 −3

Original line number	Original line	Diff line number	Diff line
	@@ -78,13 +78,13 @@ compatibility_stage:
	- CXX=g++-14 FC=gfortran-14 ./configure		- CXX=g++-14 FC=gfortran-14 ./configure
	- make wipe		- make wipe
	- make -j		- make -j
	- echo "Running make with refinement with gnu compilers version 14..."		- echo "Running make with gnu compilers version 14..."
	- cd ..		- cd ..
	- rm -rf build_gnu14		- rm -rf build_gnu14
	- mkdir build_gnu14_refine		- mkdir build_gnu14_refine
	- cd build_gnu14_refine		- cd build_gnu14_refine
	- cp -r ../build/* .		- cp -r ../build/* .
	- CXX=g++-14 FC=gfortran-14 ./configure --enable-refinement		- CXX=g++-14 FC=gfortran-14 ./configure
	- make wipe		- make wipe
	- make -j		- make -j
	#- echo "Running make with flang version 16 and clang version 16..."		#- echo "Running make with flang version 16 and clang version 16..."
	@@ -173,7 +173,7 @@ building_stage:
	- cat /etc/os-release		- cat /etc/os-release
	- cd build		- cd build
	- echo "Configuring with default compilers (MAGMA disabled)..."		- echo "Configuring with default compilers (MAGMA disabled)..."
	- ./configure --without-magma --without-cublas --disable-offload --enable-refinement --enable-shared		- ./configure --without-magma --without-cublas --disable-offload --enable-shared
	- make wipe		- make wipe
	- echo "Building the default configuration..."		- echo "Building the default configuration..."
	- make -j		- make -j

src/libnptm/clu_subs.cpp

+87 −52

Original line number	Original line	Diff line number	Diff line
	@@ -1341,6 +1341,8 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) {
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)		#pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
			#else
			#pragma omp parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4)
	#endif		#endif
	for (int i12 = 0; i12 < nlemt; i12++) {		for (int i12 = 0; i12 < nlemt; i12++) {
	// int i = i12 - 1;		// int i = i12 - 1;
	@@ -1408,6 +1410,8 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) {
	sum3 = cc0;		sum3 = cc0;
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3)		#pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3)
			#else
			#pragma omp parallel for simd reduction(+:sum2,sum3)
	#endif		#endif
	for (int i14 = 0; i14 < c1->nlem; i14++) {		for (int i14 = 0; i14 < c1->nlem; i14++) {
	int ie = i14 + c1->nlem;		int ie = i14 + c1->nlem;
	@@ -1418,6 +1422,8 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) {
	dcomplex sumpd = cc0;		dcomplex sumpd = cc0;
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd)		#pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd)
			#else
			#pragma omp parallel for simd collapse(2) reduction(+:sumpi,sumpd)
	#endif		#endif
	for (int i16 = 0; i16 < nlemt; i16++) {		for (int i16 = 0; i16 < nlemt; i16++) {
	for (int j16 = 0; j16 < c1->nlem; j16++) {		for (int j16 = 0; j16 < c1->nlem; j16++) {
	@@ -2001,6 +2007,8 @@ void raba(
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:c1, c2)		#pragma omp target teams distribute parallel for simd reduction(+:c1, c2)
			#else
			#pragma omp parallel for simd reduction(+:c1, c2)
	#endif		#endif
	for (int j10 = 1; j10 <= nlemt; j10++) {		for (int j10 = 1; j10 <= nlemt; j10++) {
	int j = j10 - 1;		int j = j10 - 1;
	@@ -2021,6 +2029,8 @@ void raba(
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp teams distribute parallel for		#pragma omp teams distribute parallel for
			#else
			#pragma omp parallel for
	#endif		#endif
	for (int ipo = 0; ipo < 2; ipo++) {		for (int ipo = 0; ipo < 2; ipo++) {
	int jpo = 1 - ipo;		int jpo = 1 - ipo;
	@@ -2055,6 +2065,8 @@ void raba(
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)		#pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
			#else
			#pragma omp parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2)
	#endif		#endif
	for (int k = 1; k<=kmax; k++) {		for (int k = 1; k<=kmax; k++) {
	int l60 = (int) sqrt(k+1);		int l60 = (int) sqrt(k+1);
	@@ -2129,7 +2141,9 @@ void raba(
	nvtxRangePush("raba loop 3");		nvtxRangePush("raba loop 3");
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp teams distribute parallel for simd		#pragma omp target teams distribute parallel for simd
			#else
			#pragma omp parallel for simd
	#endif		#endif
	for (int ipo78 = 1; ipo78 <= 2; ipo78++) {		for (int ipo78 = 1; ipo78 <= 2; ipo78++) {
	int ipo = ipo78 - 1;		int ipo = ipo78 - 1;
	@@ -2202,6 +2216,8 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) {
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:sums, sum21)		#pragma omp target teams distribute parallel for simd reduction(+:sums, sum21)
			#else
			#pragma omp parallel for simd reduction(+:sums, sum21)
	#endif		#endif
	for (int l10 = 1; l10 <= c1->li; l10++) {		for (int l10 = 1; l10 <= c1->li; l10++) {
	double fl = 1.0 * (l10 + l10 + 1);		double fl = 1.0 * (l10 + l10 + 1);
	@@ -2248,6 +2264,8 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) {
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas)		#pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas)
			#else
			#pragma omp parallel for simd reduction(+:scs, ecs, acs, tfsas)
	#endif		#endif
	for (int i14 = 1; i14 <= c1->nsph; i14++) {		for (int i14 = 1; i14 <= c1->nsph; i14++) {
	int iogi = c1->iog[i14 - 1];		int iogi = c1->iog[i14 - 1];
	@@ -2312,6 +2330,8 @@ void scr2(
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22)		#pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22)
			#else
			#pragma omp parallel for simd reduction(-:s11, s21, s12, s22)
	#endif		#endif
	for (int k = 1; k<=kmax; k++) {		for (int k = 1; k<=kmax; k++) {
	int l10 = (int) sqrt(k+1);		int l10 = (int) sqrt(k+1);
	@@ -2366,6 +2386,8 @@ void scr2(
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)		#pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
			#else
			#pragma omp parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11)
	#endif		#endif
	for (int i14 = 1; i14 <= c1->nsph; i14++) {		for (int i14 = 1; i14 <= c1->nsph; i14++) {
	int i = i14 - 1;		int i = i14 - 1;
	@@ -2398,6 +2420,8 @@ void scr2(
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd collapse(4)		#pragma omp target teams distribute parallel for simd collapse(4)
			#else
			#pragma omp parallel for simd collapse(4)
	#endif		#endif
	for (int ipo1 = 1; ipo1 <=2; ipo1++) {		for (int ipo1 = 1; ipo1 <=2; ipo1++) {
	for (int jpo1 = 1; jpo1 <= 2; jpo1++) {		for (int jpo1 = 1; jpo1 <= 2; jpo1++) {
	@@ -2422,7 +2446,9 @@ void scr2(
	nvtxRangePush("scr2 loop 4");		nvtxRangePush("scr2 loop 4");
	#endif		#endif
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target parallel for collapse(4)		#pragma omp target teams distribute parallel for collapse(4)
			#else
			#pragma omp parallel for collapse(4)
	#endif		#endif
	for (int ipo1 = 1; ipo1 <=2; ipo1++) {		for (int ipo1 = 1; ipo1 <=2; ipo1++) {
	for (int jpo1 = 1; jpo1 <= 2; jpo1++) {		for (int jpo1 = 1; jpo1 <= 2; jpo1++) {
	@@ -2534,7 +2560,7 @@ void tqr(
	}		}

	void ztm(dcomplex *am, ParticleDescriptor c1) {		void ztm(dcomplex *am, ParticleDescriptor c1) {
	dcomplex gie, gle, a1, a2, a3, a4, sum1, sum2, sum3, sum4;		// dcomplex gie, gle, a1, a2, a3, a4, sum1, sum2, sum3, sum4;
	const dcomplex cc0 = 0.0 + 0.0 * I;		const dcomplex cc0 = 0.0 + 0.0 * I;
	// int i2 = 0; // old implementation		// int i2 = 0; // old implementation
	#ifdef USE_NVTX		#ifdef USE_NVTX
	@@ -2546,7 +2572,6 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	// C9 c9_para = new C9(c9);		// C9 c9_para = new C9(c9);
	dcomplex *gis_v = c1->gis[0];		dcomplex *gis_v = c1->gis[0];
	dcomplex *gls_v = c1->gls[0];		dcomplex *gls_v = c1->gls[0];
	double rac3j_local = (double ) malloc(c1->lmtpo*sizeof(double));
	int k2max = c1->li*(c1->li+2);		int k2max = c1->li*(c1->li+2);
	int k3max = c1->le*(c1->le+2);		int k3max = c1->le*(c1->le+2);
	// To parallelise, I run a linearised loop directly over k		// To parallelise, I run a linearised loop directly over k
	@@ -2559,6 +2584,8 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	// im = im -(2*l+1) and l = l+1 (there was a rounding error in a nearly exact root)		// im = im -(2*l+1) and l = l+1 (there was a rounding error in a nearly exact root)
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd collapse(3)		#pragma omp target teams distribute parallel for simd collapse(3)
			#else
			#pragma omp parallel for simd collapse(3)
	#endif		#endif
	for (int n2 = 1; n2 <= c1->nsph; n2++) { // GPU portable?		for (int n2 = 1; n2 <= c1->nsph; n2++) { // GPU portable?
	for (int k2 = 1; k2<=k2max; k2++) {		for (int k2 = 1; k2<=k2max; k2++) {
	@@ -2590,12 +2617,13 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	int i3 = l3 * l3 + im3 - 1;		int i3 = l3 * l3 + im3 - 1;
	int m3 = -l3 - 1 + im3;		int m3 = -l3 - 1 + im3;
	int vecindex = (i2 - 1) * c1->nlem + i3 - 1;		int vecindex = (i2 - 1) * c1->nlem + i3 - 1;
			double rac3j_local = (double ) malloc(c1->lmtpo*sizeof(double));
	gis_v[vecindex] = ghit_d(2, 0, n2, l2, m2, l3, m3, c1, rac3j_local);		gis_v[vecindex] = ghit_d(2, 0, n2, l2, m2, l3, m3, c1, rac3j_local);
	gls_v[vecindex] = ghit_d(2, 1, n2, l2, m2, l3, m3, c1, rac3j_local);		gls_v[vecindex] = ghit_d(2, 1, n2, l2, m2, l3, m3, c1, rac3j_local);
			free(rac3j_local);
	} // close k3 loop, former l3 + im3 loops		} // close k3 loop, former l3 + im3 loops
	} // close k2 loop, former l2 + im2 loops		} // close k2 loop, former l2 + im2 loops
	} // close n2 loop		} // close n2 loop
	free(rac3j_local);
	#ifdef USE_NVTX		#ifdef USE_NVTX
	nvtxRangePop();		nvtxRangePop();
	#endif		#endif
	@@ -2606,6 +2634,8 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	dcomplex *sam_v = c1->sam[0];		dcomplex *sam_v = c1->sam[0];
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target teams distribute parallel for simd collapse(2)		#pragma omp target teams distribute parallel for simd collapse(2)
			#else
			#pragma omp parallel for simd collapse(2)
	#endif		#endif
	for (int i1 = 1; i1 <= c1->ndi; i1++) { // GPU portable?		for (int i1 = 1; i1 <= c1->ndi; i1++) { // GPU portable?
	for (int i3 = 1; i3 <= c1->nlem; i3++) {		for (int i3 = 1; i3 <= c1->nlem; i3++) {
	@@ -2615,7 +2645,6 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	dcomplex sum4 = cc0;		dcomplex sum4 = cc0;
	int i1e = i1 + c1->ndi;		int i1e = i1 + c1->ndi;
	int i3e = i3 + c1->nlem;		int i3e = i3 + c1->nlem;
	#pragma parallel for simd reduction(+:sum1,sum2,sum3,sum4)
	for (int i2 = 1; i2 <= c1->ndi; i2++) {		for (int i2 = 1; i2 <= c1->ndi; i2++) {
	int i2e = i2 + c1->ndi;		int i2e = i2 + c1->ndi;
	int vecindg_23 = (i2 - 1) * c1->nlem + i3 - 1;		int vecindg_23 = (i2 - 1) * c1->nlem + i3 - 1;
	@@ -2640,7 +2669,11 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	sam_v[vecind1e + i3e - 1] = sum4;		sam_v[vecind1e + i3e - 1] = sum4;
	} // i3 loop		} // i3 loop
	} // i1 loop		} // i1 loop
	#pragma omp parallel for collapse(2)		#ifdef USE_TARGET_OFFLOAD
			#pragma omp target teams distribute parallel for simd collapse(2)
			#else
			#pragma omp parallel for simd collapse(2)
			#endif
	for (int i1 = 1; i1 <= c1->ndi; i1++) {		for (int i1 = 1; i1 <= c1->ndi; i1++) {
	for (int i0 = 1; i0 <= c1->nlem; i0++) {		for (int i0 = 1; i0 <= c1->nlem; i0++) {
	int vecindex = (i1 - 1) * c1->nlem + i0 - 1;		int vecindex = (i1 - 1) * c1->nlem + i0 - 1;
	@@ -2650,7 +2683,9 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	} // i1 loop		} // i1 loop
	dcomplex *vec_am0m = c1->am0m[0];		dcomplex *vec_am0m = c1->am0m[0];
	#ifdef USE_TARGET_OFFLOAD		#ifdef USE_TARGET_OFFLOAD
	#pragma omp target parallel for collapse(2)		#pragma omp target teams distribute parallel for simd collapse(2)
			#else
			#pragma omp parallel for simd collapse(2)
	#endif		#endif
	for (int i0 = 1; i0 <= c1->nlem; i0++) {		for (int i0 = 1; i0 <= c1->nlem; i0++) {
	for (int i3 = 1; i3 <= c1->nlemt; i3++) {		for (int i3 = 1; i3 <= c1->nlemt; i3++) {
	@@ -2661,11 +2696,11 @@ void ztm(dcomplex *am, ParticleDescriptor c1) {
	int i1e = i1 + c1->ndi;		int i1e = i1 + c1->ndi;
	int vecind1 = (i1 - 1) * c1->nlemt;		int vecind1 = (i1 - 1) * c1->nlemt;
	int vecind1e = (i1e - 1) * c1->nlemt;		int vecind1e = (i1e - 1) * c1->nlemt;
	a1 = sam_v[vecind1 + i3 - 1];		dcomplex a1 = sam_v[vecind1 + i3 - 1];
	a2 = sam_v[vecind1e + i3 - 1];		dcomplex a2 = sam_v[vecind1e + i3 - 1];
	int vecindex = (i1 - 1) * c1->nlem + i0 - 1;		int vecindex = (i1 - 1) * c1->nlem + i0 - 1;
	gie = gis_v[vecindex];		dcomplex gie = gis_v[vecindex];
	gle = gls_v[vecindex];		dcomplex gle = gls_v[vecindex];
	sum1 += (a1 * gie + a2 * gle);		sum1 += (a1 * gie + a2 * gle);
	sum2 += (a1 * gle + a2 * gie);		sum2 += (a1 * gle + a2 * gie);
	} // i1 loop		} // i1 loop

src/scripts/model_maker.py

+5 −3

Original line number	Original line	Diff line number	Diff line
	@@ -328,6 +328,8 @@ def load_model(model_file):
	for j in range(expected_radii):		for j in range(expected_radii):
	sconf['rcf'][i][j] = float(model['particle_settings']['rad_frac'][i][j])		sconf['rcf'][i][j] = float(model['particle_settings']['rad_frac'][i][j])
	# Create the gconf dict		# Create the gconf dict
			use_refinement = False
			dyn_orders = True
	try:		try:
	use_refinement = False if model['system_settings']['refinement'] == "0" else True		use_refinement = False if model['system_settings']['refinement'] == "0" else True
	except KeyError:		except KeyError:
	@@ -335,7 +337,7 @@ def load_model(model_file):
	try:		try:
	dyn_orders = False if model['radiation_settings']['dyn_orders'] == "0" else True		dyn_orders = False if model['radiation_settings']['dyn_orders'] == "0" else True
	except KeyError:		except KeyError:
	use_refinement = False		dyn_orders = True
	str_polar = model['radiation_settings']['polarization']		str_polar = model['radiation_settings']['polarization']
	if (str_polar not in ["LINEAR", "CIRCULAR"]):		if (str_polar not in ["LINEAR", "CIRCULAR"]):
	print("ERROR: %s is not a recognized polarization state."%str_polar)		print("ERROR: %s is not a recognized polarization state."%str_polar)
	@@ -875,10 +877,10 @@ def write_legacy_gconf(conf):
	output.write(str_line)		output.write(str_line)
	str_line = " {0:d}\n0\n".format(conf['jwtm'])		str_line = " {0:d}\n0\n".format(conf['jwtm'])
	output.write(str_line)		output.write(str_line)
	if (gconf['use_refinement']):		if (conf['use_refinement']):
	str_line = "USE_REFINEMENT=1\n"		str_line = "USE_REFINEMENT=1\n"
	output.write(str_line)		output.write(str_line)
	if (not gconf['dyn_orders']):		if (not conf['dyn_orders']):
	str_line = "USE_DYN_ORDER=0\n"		str_line = "USE_DYN_ORDER=0\n"
	output.write(str_line)		output.write(str_line)
	output.close()		output.close()

src/trapping/cfrfme.cpp

+57 −43

Original line number	Original line	Diff line number	Diff line
	@@ -61,6 +61,14 @@
	#include <nvtx3/nvToolsExt.h>		#include <nvtx3/nvToolsExt.h>
	#endif		#endif

			#ifdef _OPENMP
			#include <omp.h>
			#endif

			#ifdef USE_TARGET_OFFLOAD
			#pragma omp requires unified_shared_memory
			#endif

	using namespace std;		using namespace std;

	/*! \brief C++ implementation of FRFME		/*! \brief C++ implementation of FRFME
	@@ -391,58 +399,64 @@ void frfme(string data_file, string output_path) {
	#ifdef USE_NVTX		#ifdef USE_NVTX
	nvtxRangePush("j80 loop");		nvtxRangePush("j80 loop");
	#endif		#endif
	#pragma omp parallel for		dcomplex *vec_wsum = tfrfme->wsum[0];
	for (int j80 = jlmf; j80 <= jlml; j80++) {		double *vec_vkzm = vkzm[0];
	dcomplex vec_w = new dcomplex[nkv nkv]();		#ifdef USE_TARGET_OFFLOAD
	dcomplex *w = new dcomplex[nkv];		#pragma omp target teams distribute parallel for simd
			#else
			#pragma omp parallel for simd
			#endif
			for (int j80 = jlmf-1; j80 < jlml; j80++) {
			int nkvs = nkv * nkv;
			dcomplex vec_w = (dcomplex ) calloc(nkvs, sizeof(dcomplex));
			dcomplex w = (dcomplex ) calloc(nkv, sizeof(dcomplex *));
	for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;		for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv;
	dcomplex wk_value;		dcomplex wk_value;
	for (int jy50 = 0; jy50 < nkv; jy50++) {		int wk_index = 0;
	for (int jx50 = 0; jx50 < nkv; jx50++) {		for (int jxy50 = 0; jxy50 < nkvs; jxy50++) {
	wk_value = tt1->wk[jy50 * nkv * nlmmt + jx50 * nlmmt + j80 - 1];		wk_index = nlmmt*jxy50;
	w[jx50][jy50] = wk_value;		wk_value = tt1->wk[wk_index + j80];
	} // jx50		int jy50 = jxy50 / nkv;
	} // jy50 loop		int jx50 = jxy50 % nkv;
			vec_w[(nkv*jx50) + jy50] = wk_value;
			} // jxy50 loop
	int ixyz = 0;		int ixyz = 0;
	for (int iz75 = 0; iz75 < nzv; iz75++) {		for (int wj = 0; wj < nrvc; wj++) vec_wsum[(j80*nrvc)+wj] = cc0;
			int nvtot = nxvnyvnzv;
			int nvxy = nxv *nyv;
			for (int ixyz = 0; ixyz < nvtot; ixyz++) {
			int iz75 = ixyz / nvxy;
			int iy70 = (ixyz % nvxy) / nxv;
			int ix65 = ixyz % nxv;
	double z = _zv[iz75] + frsh;		double z = _zv[iz75] + frsh;
	for (int iy70 = 0; iy70 < nyv; iy70++) {
	double y = _yv[iy70];		double y = _yv[iy70];
	for (int ix65 = 0; ix65 < nxv; ix65++) {
	double x = _xv[ix65];		double x = _xv[ix65];
	ixyz++;
	dcomplex sumy = cc0;		dcomplex sumy = cc0;
	for (int jy60 = 0; jy60 < nkv; jy60++) {		for (int jy60x55 = 0; jy60x55 < nkvs ; jy60x55++) {
			int jy60 = jy60x55 / nkv;
			int jx55 = jy60x55 % nkv;
	double vky = vkv[jy60];		double vky = vkv[jy60];
	double vkx = vkv[nkv - 1];		double vkx = (jx55==0) ? vkv[nkv - 1] : vkv[jx55];
	double vkzf = vkzm[0][jy60];		double vkz = vec_vkzm[(jx55*nkv)+jy60];
	dcomplex phasf = cexp(uim * (-vkx * x + vky * y + vkzf * z));		dcomplex phas = (jx55==0) ?
	double vkzl = vkzm[nkv - 1][jy60];		cexp(uim * (-vkx * x + vky * y + vkz * z)):
	dcomplex phasl = cexp(uim * (vkx * x + vky * y + vkzl * z));		cexp(uim * (vkx * x + vky * y + vkz * z));
	dcomplex sumx = 0.5 * (w[0][jy60] * phasf + w[nkv - 1][jy60] * phasl);		dcomplex sumx = vec_w[(jx55nkv)+jy60] phas;
	for (int jx55 = 2; jx55 <= nks; jx55++) {		double factor1 = ((jx55==0)\|\|(jx55==(nkv-1))) ? 0.5 : 1.0;
	vkx = vkv[jx55 - 1];		double factor2 = ((jy60==0)\|\|(jy60==(nkv-1))) ? 0.5 : 1.0;
	double vkz = vkzm[jx55 - 1][jy60];		sumx = factor1factor2;
	dcomplex phas = cexp(uim * (vkx * x + vky * y + vkz * z));
	sumx += (w[jx55 - 1][jy60] * phas);
	} // jx55 loop
	if (jy60 == 0 \|\| jy60 == nkv - 1) sumx *= 0.5;
	sumy += sumx;		sumy += sumx;
	} // jy60 loop		} // jy60x55 loop
	tfrfme->wsum[j80 - 1][ixyz - 1] = sumy * delks;		vec_wsum[((j80)nrvc)+ixyz] = sumy delks;
	} // ix65 loop		} // ixyz loop
	} // iy70 loop		free(vec_w);
	} // iz75 loop		free(w);
	delete[] vec_w;
	delete[] w;
	} // j80 loop		} // j80 loop
	#ifdef USE_NVTX		#ifdef USE_NVTX
	nvtxRangePop();		nvtxRangePop();
	#endif
	// label 88
	#ifdef USE_NVTX
	nvtxRangePush("Closing operations");		nvtxRangePush("Closing operations");
	#endif		#endif
			// label 88
	tfrfme->write_binary(tfrfme_name, "HDF5");		tfrfme->write_binary(tfrfme_name, "HDF5");
	string output_name = output_path + "/c_OFRFME";		string output_name = output_path + "/c_OFRFME";
	FILE *output = fopen(output_name.c_str(), "w");		FILE *output = fopen(output_name.c_str(), "w");

build/configure.sh

+1 −1

File changed.

Contains only whitespace changes.