Commit 938cbc92 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Drop unified shared memory requirement and enable GPU offload in TRAPPING

parent 72c6db18
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -47,9 +47,9 @@
#include <omp.h>
#endif

#ifdef USE_TARGET_OFFLOAD
#pragma omp requires unified_shared_memory
#endif
// #ifdef USE_TARGET_OFFLOAD
// #pragma omp requires unified_shared_memory
// #endif

using namespace std;

+13 −13
Original line number Diff line number Diff line
@@ -65,10 +65,6 @@
#include <omp.h>
#endif

#ifdef USE_TARGET_OFFLOAD
#pragma omp requires unified_shared_memory
#endif

using namespace std;

/*! \brief C++ implementation of FRFME
@@ -400,9 +396,13 @@ void frfme(string data_file, string output_path) {
	  nvtxRangePush("j80 loop");
#endif
	  dcomplex *vec_wsum = tfrfme->wsum[0];
	  int size_wsum = nlmmt * nrvc;
	  double *vec_vkzm = vkzm[0];
	  int size_vkzm = nkv * nkv;
	  const dcomplex *vec_tt1_wk = tt1->wk;
	  int size_tt1_wk = nkv * nkv * nlmmt;
#ifdef USE_TARGET_OFFLOAD
#pragma omp target teams distribute parallel for simd
#pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv], jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks)
#else
#pragma omp parallel for simd
#endif
@@ -415,7 +415,7 @@ void frfme(string data_file, string output_path) {
	    int wk_index = 0;
	    for (int jxy50 = 0; jxy50 < nkvs; jxy50++) {
		wk_index = nlmmt * jxy50;
		wk_value = tt1->wk[wk_index + j80];
		wk_value = vec_tt1_wk[wk_index + j80];
		int jy50 = jxy50 / nkv;
		int jx50 = jxy50 % nkv;
		vec_w[(nkv*jx50) + jy50] =  wk_value;