Loading src/trapping/cfrfme.cpp +55 −5 Original line number Diff line number Diff line Loading @@ -66,12 +66,26 @@ #endif #ifdef USE_TARGET_OFFLOAD void map_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ); void offload_loop( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt, double delks, double frsh ); void unmap_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ); #endif using namespace std; Loading Loading @@ -413,11 +427,27 @@ void frfme(string data_file, string output_path) { double *vec_vkzm = vkzm[0]; dcomplex *global_vec_w = new dcomplex[size_global_vec_w]; #ifdef USE_TARGET_OFFLOAD message = "INFO: Mapping data to device.\n"; logger.log(message); map_data( vec_wsum, size_vec_wsum, global_vec_w, size_global_vec_w, vec_tt1_wk, size_vec_tt1_wk, vkv, _xv, nxv, _yv, nyv, _zv, nzv, vec_vkzm, jlmf, jlml, nkv, nlmmt ); message = "INFO: computing loop.\n"; logger.log(message); offload_loop( vec_wsum, size_vec_wsum, global_vec_w, size_global_vec_w, vec_tt1_wk, size_vec_tt1_wk, vkv, _xv, nxv, _yv, nyv, _zv, nzv, vec_vkzm, jlmf, jlml, nkv, nlmmt, delks, frsh ); message = "INFO: cleaning device memory.\n"; logger.log(message); unmap_data( vec_wsum, size_vec_wsum, global_vec_w, size_global_vec_w, vec_tt1_wk, size_vec_tt1_wk, vkv, _xv, nxv, _yv, nyv, _zv, nzv, vec_vkzm, jlmf, jlml, nkv, nlmmt ); #else #pragma omp parallel for for (int j80 = jlmf - 1; j80 < jlml; j80++) { Loading Loading @@ -522,6 +552,18 @@ void frfme(string data_file, string output_path) { } #ifdef USE_TARGET_OFFLOAD void map_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ) { #pragma omp target enter data map(to: vec_wsum[0:size_vec_wsum]) \ map(alloc: global_vec_w[0:size_global_vec_w]) \ map(to: vec_tt1_wk[0:size_vec_tt1_wk]) \ map(to: _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) } void offload_loop( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, Loading @@ -532,11 +574,7 @@ void offload_loop( int nkvs = nkv * nkv; const dcomplex cc0 = 0.0 + I * 0.0; const dcomplex uim = 0.0 + I * 1.0; #pragma omp target teams distribute parallel for \ map(tofrom: vec_wsum[0:size_vec_wsum]) \ map(alloc: global_vec_w[0:size_global_vec_w]) \ map(to: vec_tt1_wk[0:size_vec_tt1_wk]) \ map(to: _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) #pragma omp target teams distribute parallel for for (int j80 = jlmf - 1; j80 < jlml; j80++) { dcomplex *vec_w = global_vec_w + nkvs * (j80 - jlmf + 1); #pragma omp parallel for simd Loading Loading @@ -583,4 +621,16 @@ void offload_loop( } // ixyz loop } // j80 loop } void unmap_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ) { #pragma omp target exit data map(from: vec_wsum[0:size_vec_wsum]) \ map(delete: global_vec_w[0:size_global_vec_w]) \ map(delete: vec_tt1_wk[0:size_vec_tt1_wk]) \ map(delete: _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) } #endif // USE TARGET_OFFLOAD Loading
src/trapping/cfrfme.cpp +55 −5 Original line number Diff line number Diff line Loading @@ -66,12 +66,26 @@ #endif #ifdef USE_TARGET_OFFLOAD void map_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ); void offload_loop( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt, double delks, double frsh ); void unmap_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ); #endif using namespace std; Loading Loading @@ -413,11 +427,27 @@ void frfme(string data_file, string output_path) { double *vec_vkzm = vkzm[0]; dcomplex *global_vec_w = new dcomplex[size_global_vec_w]; #ifdef USE_TARGET_OFFLOAD message = "INFO: Mapping data to device.\n"; logger.log(message); map_data( vec_wsum, size_vec_wsum, global_vec_w, size_global_vec_w, vec_tt1_wk, size_vec_tt1_wk, vkv, _xv, nxv, _yv, nyv, _zv, nzv, vec_vkzm, jlmf, jlml, nkv, nlmmt ); message = "INFO: computing loop.\n"; logger.log(message); offload_loop( vec_wsum, size_vec_wsum, global_vec_w, size_global_vec_w, vec_tt1_wk, size_vec_tt1_wk, vkv, _xv, nxv, _yv, nyv, _zv, nzv, vec_vkzm, jlmf, jlml, nkv, nlmmt, delks, frsh ); message = "INFO: cleaning device memory.\n"; logger.log(message); unmap_data( vec_wsum, size_vec_wsum, global_vec_w, size_global_vec_w, vec_tt1_wk, size_vec_tt1_wk, vkv, _xv, nxv, _yv, nyv, _zv, nzv, vec_vkzm, jlmf, jlml, nkv, nlmmt ); #else #pragma omp parallel for for (int j80 = jlmf - 1; j80 < jlml; j80++) { Loading Loading @@ -522,6 +552,18 @@ void frfme(string data_file, string output_path) { } #ifdef USE_TARGET_OFFLOAD void map_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ) { #pragma omp target enter data map(to: vec_wsum[0:size_vec_wsum]) \ map(alloc: global_vec_w[0:size_global_vec_w]) \ map(to: vec_tt1_wk[0:size_vec_tt1_wk]) \ map(to: _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) } void offload_loop( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, Loading @@ -532,11 +574,7 @@ void offload_loop( int nkvs = nkv * nkv; const dcomplex cc0 = 0.0 + I * 0.0; const dcomplex uim = 0.0 + I * 1.0; #pragma omp target teams distribute parallel for \ map(tofrom: vec_wsum[0:size_vec_wsum]) \ map(alloc: global_vec_w[0:size_global_vec_w]) \ map(to: vec_tt1_wk[0:size_vec_tt1_wk]) \ map(to: _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) #pragma omp target teams distribute parallel for for (int j80 = jlmf - 1; j80 < jlml; j80++) { dcomplex *vec_w = global_vec_w + nkvs * (j80 - jlmf + 1); #pragma omp parallel for simd Loading Loading @@ -583,4 +621,16 @@ void offload_loop( } // ixyz loop } // j80 loop } void unmap_data( dcomplex *vec_wsum, int size_vec_wsum, dcomplex *global_vec_w, int size_global_vec_w, const dcomplex *vec_tt1_wk, int size_vec_tt1_wk, double *vkv, double *_xv, int nxv, double *_yv, int nyv, double *_zv, int nzv, double *vec_vkzm, int jlmf, int jlml, int nkv, int nlmmt ) { #pragma omp target exit data map(from: vec_wsum[0:size_vec_wsum]) \ map(delete: global_vec_w[0:size_global_vec_w]) \ map(delete: vec_tt1_wk[0:size_vec_tt1_wk]) \ map(delete: _xv[0:nxv], _yv[0:nyv], _zv[0:nzv]) } #endif // USE TARGET_OFFLOAD