Commit 71c48282 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Use pre-computed indices in loop and fix timers

parent 5795b9a1
Loading
Loading
Loading
Loading
+15 −13
Original line number Diff line number Diff line
@@ -279,7 +279,7 @@ void frfme(string data_file, string output_path) {
#ifdef USE_NVTX
    nvtxRangePop();
#endif
  } else { // label 16; jlfm = 1
  } else { // label 16; jlmf = 1
#ifdef USE_NVTX
    nvtxRangePush("frfme() with jlmf == 1");
#endif
@@ -539,7 +539,8 @@ void frfme(string data_file, string output_path) {
	  double *vec_vkzm = vkzm[0];
	  dcomplex *global_vec_w = new dcomplex[size_global_vec_w];
#ifdef USE_TARGET_OFFLOAD
	  frfme_duration = t_start - chrono::high_resolution_clock::now();
	  elapsed = t_start - chrono::high_resolution_clock::now();
	  frfme_duration = elapsed;
	  t_start = chrono::high_resolution_clock::now();
	  message = "INFO: Mapping data to device.\n";
	  logger.log(message);
@@ -706,11 +707,12 @@ void frfme(string data_file, string output_path) {
#ifdef USE_NVTX
  nvtxRangePop();
#endif
#ifndef USE_TARGET_OFFLOAD
  elapsed = chrono::high_resolution_clock::now() - t_start;
#ifdef USE_TARGET_OFFLOAD
  frfme_duration += elapsed;
#else
  frfme_duration = elapsed;
#else
  elapsed = chrono::high_resolution_clock::now() - t_end;
  frfme_duration += elapsed;
#endif
  message = "INFO: FRFME took " + to_string(frfme_duration.count()) + "s.\n";
  logger.log(message);
@@ -753,10 +755,10 @@ void offload_loop(
  double *_yv, const int nyv, double *_zv, const int nzv, double *vec_vkzm, const int jlmf, const int jlml,
  const int nkv, const int nlmmt, double delks, double frsh
) {
  int nrvc = nxv * nyv * nzv;
  int nvtot = nxv * nyv * nzv;
  int nkvs = nkv * nkv;
  int nkvmo = nkv - 1;
  int nkvvmo = (nkv - 1) * nkv;
  int nkvmo = nkvmo;
  int nkvvmo = nkvmo * nkv;
  int nvxy = nxv * nyv;
  dcomplex cc0 = 0.0 + I * 0.0;
  dcomplex uim = 0.0 + I * 1.0;
@@ -764,7 +766,7 @@ void offload_loop(
  for (int j80 = jlmf - 1; j80 < jlml; j80++) {
    int j80_index = j80 - jlmf + 1;
    dcomplex *vec_w = global_vec_w + nkvs * j80_index;
    for (int ixyz = 0; ixyz < nrvc; ixyz++) {
    for (int ixyz = 0; ixyz < nvtot; ixyz++) {
      int iz75 = ixyz / nvxy;
      int iy70 = (ixyz % nvxy) / nxv;
      int ix65 = ixyz % nxv;
@@ -779,7 +781,7 @@ void offload_loop(
	int jx55 = jy60x55 % nkv;
	int w_index = (jx55 * nkv) + jy60;
	double vky = vkv[jy60];
	double factor = (jy60 == 0 || jy60 == nkv - 1) ? 0.5 : 1.0;
	double factor = (jy60 == 0 || jy60 == nkvmo) ? 0.5 : 1.0;
	if (jx55 == 0) {
	  // jx55 = 0: phasf
	  double vkx = vkv[nkvmo];
@@ -790,9 +792,9 @@ void offload_loop(
	  term *= factor;
	  rsumy += (real(term));
	  isumy += (imag(term));
	} else if (jx55 == nkv - 1) {
	} else if (jx55 == nkvmo) {
	  // jx55 = nkv - 1: phasl
	  double vkx = vkv[nkv - 1];
	  double vkx = vkv[nkvmo];
	  double vkz = vec_vkzm[nkvvmo + jy60];
	  double angle = vkx * x + vky * y + vkz * z;
	  dcomplex phasl = cos(angle) + uim * sin(angle);
@@ -813,7 +815,7 @@ void offload_loop(
	}
      } // jy60x55 loop
      dcomplex sumy = rsumy + uim * isumy;
      vec_wsum[(j80_index * nrvc) + ixyz] = sumy * delks;
      vec_wsum[(j80_index * nvtot) + ixyz] = sumy * delks;
    } // ixyz loop
  } // j80 loop
}