Drop unified shared memory requirement and enable GPU offload in TRAPPING (938cbc92) · Commits · Giacomo Mulas / NP_TMcode

src/libnptm/clu_subs.cpp

+3 −3

Original line number	Diff line number	Diff line
		@@ -47,9 +47,9 @@
		#include <omp.h>
		#endif

		#ifdef USE_TARGET_OFFLOAD
		#pragma omp requires unified_shared_memory
		#endif
		// #ifdef USE_TARGET_OFFLOAD
		// #pragma omp requires unified_shared_memory
		// #endif

		using namespace std;

+13 −13

Original line number	Diff line number	Diff line
		@@ -65,10 +65,6 @@
		#include <omp.h>
		#endif

		#ifdef USE_TARGET_OFFLOAD
		#pragma omp requires unified_shared_memory
		#endif

		using namespace std;

		/*! \brief C++ implementation of FRFME
		@@ -400,9 +396,13 @@ void frfme(string data_file, string output_path) {
		nvtxRangePush("j80 loop");
		#endif
		dcomplex *vec_wsum = tfrfme->wsum[0];
		int size_wsum = nlmmt * nrvc;
		double *vec_vkzm = vkzm[0];
		int size_vkzm = nkv * nkv;
		const dcomplex *vec_tt1_wk = tt1->wk;
		int size_tt1_wk = nkv * nkv * nlmmt;
		#ifdef USE_TARGET_OFFLOAD
		#pragma omp target teams distribute parallel for simd
		#pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv], jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks)
		#else
		#pragma omp parallel for simd
		#endif
		@@ -415,7 +415,7 @@ void frfme(string data_file, string output_path) {
		int wk_index = 0;
		for (int jxy50 = 0; jxy50 < nkvs; jxy50++) {
		wk_index = nlmmt * jxy50;
		wk_value = tt1->wk[wk_index + j80];
		wk_value = vec_tt1_wk[wk_index + j80];
		int jy50 = jxy50 / nkv;
		int jx50 = jxy50 % nkv;
		vec_w[(nkv*jx50) + jy50] = wk_value;