Loading src/libnptm/clu_subs.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -47,9 +47,9 @@ #include <omp.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif // #ifdef USE_TARGET_OFFLOAD // #pragma omp requires unified_shared_memory // #endif using namespace std; Loading src/trapping/cfrfme.cpp +13 −13 Original line number Diff line number Diff line Loading @@ -65,10 +65,6 @@ #include <omp.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif using namespace std; /*! \brief C++ implementation of FRFME Loading Loading @@ -400,9 +396,13 @@ void frfme(string data_file, string output_path) { nvtxRangePush("j80 loop"); #endif dcomplex *vec_wsum = tfrfme->wsum[0]; int size_wsum = nlmmt * nrvc; double *vec_vkzm = vkzm[0]; int size_vkzm = nkv * nkv; const dcomplex *vec_tt1_wk = tt1->wk; int size_tt1_wk = nkv * nkv * nlmmt; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd #pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv], jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks) #else #pragma omp parallel for simd #endif Loading @@ -415,7 +415,7 @@ void frfme(string data_file, string output_path) { int wk_index = 0; for (int jxy50 = 0; jxy50 < nkvs; jxy50++) { wk_index = nlmmt * jxy50; wk_value = tt1->wk[wk_index + j80]; wk_value = vec_tt1_wk[wk_index + j80]; int jy50 = jxy50 / nkv; int jx50 = jxy50 % nkv; vec_w[(nkv*jx50) + jy50] = wk_value; Loading Loading
src/libnptm/clu_subs.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -47,9 +47,9 @@ #include <omp.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif // #ifdef USE_TARGET_OFFLOAD // #pragma omp requires unified_shared_memory // #endif using namespace std; Loading
src/trapping/cfrfme.cpp +13 −13 Original line number Diff line number Diff line Loading @@ -65,10 +65,6 @@ #include <omp.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif using namespace std; /*! \brief C++ implementation of FRFME Loading Loading @@ -400,9 +396,13 @@ void frfme(string data_file, string output_path) { nvtxRangePush("j80 loop"); #endif dcomplex *vec_wsum = tfrfme->wsum[0]; int size_wsum = nlmmt * nrvc; double *vec_vkzm = vkzm[0]; int size_vkzm = nkv * nkv; const dcomplex *vec_tt1_wk = tt1->wk; int size_tt1_wk = nkv * nkv * nlmmt; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd #pragma omp target teams distribute parallel for simd map(tofrom: vec_wsum[0:size_wsum]) map(to:vec_vkzm[0:size_vkzm], vkv[0:nkv], vec_tt1_wk[0:size_tt1_wk], _xv[0:nxv], _yv[0:nyv], _zv[0:nzv], jlmf, jlml, nkv, nlmmt, nrvc, nxv, nyv, nzv, frsh, uim, delks) #else #pragma omp parallel for simd #endif Loading @@ -415,7 +415,7 @@ void frfme(string data_file, string output_path) { int wk_index = 0; for (int jxy50 = 0; jxy50 < nkvs; jxy50++) { wk_index = nlmmt * jxy50; wk_value = tt1->wk[wk_index + j80]; wk_value = vec_tt1_wk[wk_index + j80]; int jy50 = jxy50 / nkv; int jx50 = jxy50 % nkv; vec_w[(nkv*jx50) + jy50] = wk_value; Loading