Loading src/trapping/cfrfme.cpp +15 −13 Original line number Diff line number Diff line Loading @@ -279,7 +279,7 @@ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePop(); #endif } else { // label 16; jlfm = 1 } else { // label 16; jlmf = 1 #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf == 1"); #endif Loading Loading @@ -539,7 +539,8 @@ void frfme(string data_file, string output_path) { double *vec_vkzm = vkzm[0]; dcomplex *global_vec_w = new dcomplex[size_global_vec_w]; #ifdef USE_TARGET_OFFLOAD frfme_duration = t_start - chrono::high_resolution_clock::now(); elapsed = t_start - chrono::high_resolution_clock::now(); frfme_duration = elapsed; t_start = chrono::high_resolution_clock::now(); message = "INFO: Mapping data to device.\n"; logger.log(message); Loading Loading @@ -706,11 +707,12 @@ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePop(); #endif #ifndef USE_TARGET_OFFLOAD elapsed = chrono::high_resolution_clock::now() - t_start; #ifdef USE_TARGET_OFFLOAD frfme_duration += elapsed; #else frfme_duration = elapsed; #else elapsed = chrono::high_resolution_clock::now() - t_end; frfme_duration += elapsed; #endif message = "INFO: FRFME took " + to_string(frfme_duration.count()) + "s.\n"; logger.log(message); Loading Loading @@ -753,10 +755,10 @@ void offload_loop( double *_yv, const int nyv, double *_zv, const int nzv, double *vec_vkzm, const int jlmf, const int jlml, const int nkv, const int nlmmt, double delks, double frsh ) { int nrvc = nxv * nyv * nzv; int nvtot = nxv * nyv * nzv; int nkvs = nkv * nkv; int nkvmo = nkv - 1; int nkvvmo = (nkv - 1) * nkv; int nkvmo = nkvmo; int nkvvmo = nkvmo * nkv; int nvxy = nxv * nyv; dcomplex cc0 = 0.0 + I * 0.0; dcomplex uim = 0.0 + I * 1.0; Loading @@ -764,7 +766,7 @@ void offload_loop( for (int j80 = jlmf - 1; j80 < jlml; j80++) { int j80_index = j80 - jlmf + 1; dcomplex *vec_w = global_vec_w + nkvs * j80_index; for (int ixyz = 0; ixyz < nrvc; ixyz++) { for (int ixyz = 0; ixyz < nvtot; ixyz++) { int iz75 = ixyz / nvxy; int iy70 = (ixyz % nvxy) / nxv; int ix65 = ixyz % nxv; Loading @@ -779,7 +781,7 @@ void offload_loop( int jx55 = jy60x55 % nkv; int w_index = (jx55 * nkv) + jy60; double vky = vkv[jy60]; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; double factor = (jy60 == 0 || jy60 == nkvmo) ? 0.5 : 1.0; if (jx55 == 0) { // jx55 = 0: phasf double vkx = vkv[nkvmo]; Loading @@ -790,9 +792,9 @@ void offload_loop( term *= factor; rsumy += (real(term)); isumy += (imag(term)); } else if (jx55 == nkv - 1) { } else if (jx55 == nkvmo) { // jx55 = nkv - 1: phasl double vkx = vkv[nkv - 1]; double vkx = vkv[nkvmo]; double vkz = vec_vkzm[nkvvmo + jy60]; double angle = vkx * x + vky * y + vkz * z; dcomplex phasl = cos(angle) + uim * sin(angle); Loading @@ -813,7 +815,7 @@ void offload_loop( } } // jy60x55 loop dcomplex sumy = rsumy + uim * isumy; vec_wsum[(j80_index * nrvc) + ixyz] = sumy * delks; vec_wsum[(j80_index * nvtot) + ixyz] = sumy * delks; } // ixyz loop } // j80 loop } Loading Loading
src/trapping/cfrfme.cpp +15 −13 Original line number Diff line number Diff line Loading @@ -279,7 +279,7 @@ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePop(); #endif } else { // label 16; jlfm = 1 } else { // label 16; jlmf = 1 #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf == 1"); #endif Loading Loading @@ -539,7 +539,8 @@ void frfme(string data_file, string output_path) { double *vec_vkzm = vkzm[0]; dcomplex *global_vec_w = new dcomplex[size_global_vec_w]; #ifdef USE_TARGET_OFFLOAD frfme_duration = t_start - chrono::high_resolution_clock::now(); elapsed = t_start - chrono::high_resolution_clock::now(); frfme_duration = elapsed; t_start = chrono::high_resolution_clock::now(); message = "INFO: Mapping data to device.\n"; logger.log(message); Loading Loading @@ -706,11 +707,12 @@ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePop(); #endif #ifndef USE_TARGET_OFFLOAD elapsed = chrono::high_resolution_clock::now() - t_start; #ifdef USE_TARGET_OFFLOAD frfme_duration += elapsed; #else frfme_duration = elapsed; #else elapsed = chrono::high_resolution_clock::now() - t_end; frfme_duration += elapsed; #endif message = "INFO: FRFME took " + to_string(frfme_duration.count()) + "s.\n"; logger.log(message); Loading Loading @@ -753,10 +755,10 @@ void offload_loop( double *_yv, const int nyv, double *_zv, const int nzv, double *vec_vkzm, const int jlmf, const int jlml, const int nkv, const int nlmmt, double delks, double frsh ) { int nrvc = nxv * nyv * nzv; int nvtot = nxv * nyv * nzv; int nkvs = nkv * nkv; int nkvmo = nkv - 1; int nkvvmo = (nkv - 1) * nkv; int nkvmo = nkvmo; int nkvvmo = nkvmo * nkv; int nvxy = nxv * nyv; dcomplex cc0 = 0.0 + I * 0.0; dcomplex uim = 0.0 + I * 1.0; Loading @@ -764,7 +766,7 @@ void offload_loop( for (int j80 = jlmf - 1; j80 < jlml; j80++) { int j80_index = j80 - jlmf + 1; dcomplex *vec_w = global_vec_w + nkvs * j80_index; for (int ixyz = 0; ixyz < nrvc; ixyz++) { for (int ixyz = 0; ixyz < nvtot; ixyz++) { int iz75 = ixyz / nvxy; int iy70 = (ixyz % nvxy) / nxv; int ix65 = ixyz % nxv; Loading @@ -779,7 +781,7 @@ void offload_loop( int jx55 = jy60x55 % nkv; int w_index = (jx55 * nkv) + jy60; double vky = vkv[jy60]; double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0; double factor = (jy60 == 0 || jy60 == nkvmo) ? 0.5 : 1.0; if (jx55 == 0) { // jx55 = 0: phasf double vkx = vkv[nkvmo]; Loading @@ -790,9 +792,9 @@ void offload_loop( term *= factor; rsumy += (real(term)); isumy += (imag(term)); } else if (jx55 == nkv - 1) { } else if (jx55 == nkvmo) { // jx55 = nkv - 1: phasl double vkx = vkv[nkv - 1]; double vkx = vkv[nkvmo]; double vkz = vec_vkzm[nkvvmo + jy60]; double angle = vkx * x + vky * y + vkz * z; dcomplex phasl = cos(angle) + uim * sin(angle); Loading @@ -813,7 +815,7 @@ void offload_loop( } } // jy60x55 loop dcomplex sumy = rsumy + uim * isumy; vec_wsum[(j80_index * nrvc) + ixyz] = sumy * delks; vec_wsum[(j80_index * nvtot) + ixyz] = sumy * delks; } // ixyz loop } // j80 loop } Loading