Commit 5629fbb4 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Stop writing the c_TPPOAN file in np_cluster

parent 9b0ea781
Loading
Loading
Loading
Loading
+4 −209
Original line number Diff line number Diff line
@@ -117,9 +117,8 @@ using namespace std;
 *  \param cid: `ClusterIterationData *` Pointer to a `ClusterIterationData` object.
 *  \param oi: `ClusterOutputInfo *` Pointer to a `ClusterOutputInfo` object.
 *  \param output_path: `const string &` Path to the output directory.
 *  \param vtppoanp: `VirtualBinaryFile *` Pointer to a `VirtualBinaryFile` object.
 */
int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, ClusterIterationData *cid, ClusterOutputInfo *oi, const string& output_path, VirtualBinaryFile *vtppoanp);
int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, ClusterIterationData *cid, ClusterOutputInfo *oi, const string& output_path);

/*! \brief C++ implementation of CLU
 *
@@ -355,8 +354,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
      double exri = sqrt(exdc);

      // Create empty virtual binary file
      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
      string tppoan_name = output_path + "/c_TPPOAN";
#ifdef USE_MAGMA
      logger->log("INFO: using MAGMA calls.\n", LOG_INFO);
#elif defined USE_CUBLAS
@@ -375,17 +372,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
      int nph = p_scattering_angles->nph;
      int nphs = p_scattering_angles->nphs;

      //========================
      // write a block of info to virtual binary file
      //========================
      vtppoanp->append_line(VirtualBinaryLine(iavm));
      vtppoanp->append_line(VirtualBinaryLine(isam));
      vtppoanp->append_line(VirtualBinaryLine(inpol));
      vtppoanp->append_line(VirtualBinaryLine(nxi));
      vtppoanp->append_line(VirtualBinaryLine(nth));
      vtppoanp->append_line(VirtualBinaryLine(nph));
      vtppoanp->append_line(VirtualBinaryLine(nths));
      vtppoanp->append_line(VirtualBinaryLine(nphs));
      if (sconf->idfc < 0) {
	cid->vk = cid->xip * cid->wn;
	p_output->vec_vk[0] = cid->vk;
@@ -394,13 +380,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
      int jxi488;
      int jer = 0;

      //==================================================
      // do the first outputs here, so that I open here the new files, afterwards I only append
      //==================================================
      // How should we handle this, when first iteration is not treated specially anymore? This should be ok, just write what was put in vtppoanp on initialisation, even if no actual calc was done yet. This creates the file nonetheless, 
      vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
      delete vtppoanp;

      // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
#ifdef MPI_VERSION
      if (mpidata->mpirunning) {
@@ -418,7 +397,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
      int myMPIblock = ompnumthreads;
      // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later
      ClusterOutputInfo **p_outarray = NULL;
      VirtualBinaryFile **vtppoanarray = NULL;

#ifdef USE_NVTX
      nvtxRangePush("Parallel loop");
@@ -441,7 +419,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	if (myompthread == 0) {
	  // Initialise some shared variables only on thread 0
	  p_outarray = new ClusterOutputInfo*[ompnumthreads];
	  vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
	  myMPIblock = ompnumthreads;
	  myMPIstride = myMPIblock;
	}
@@ -470,7 +447,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
	ClusterIterationData *cid_2 = NULL;
	ClusterOutputInfo *p_output_2 = NULL;
	VirtualBinaryFile *vtppoanp_2 = NULL;
	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
	if (myompthread == 0) {
	  cid_2 = cid;
@@ -494,9 +470,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
#pragma omp barrier
	  int myjxi488 = ixi488+myompthread;
	  // each thread opens new virtual files and stores their pointers in the shared array
	  vtppoanp_2 = new VirtualBinaryFile();
	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
	  vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier

	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
@@ -506,7 +479,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
	      p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	      p_outarray[myompthread] = p_output_2;
	    }
	    int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	    int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path);
	  } else {
	    if (myompthread > 0) {
	      // If there is no input for this thread, mark to skip.
@@ -525,8 +498,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	      p_outarray[0]->insert(*(p_outarray[ti]));
	      delete p_outarray[ti];
	      p_outarray[ti] = NULL;
	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
	      delete vtppoanarray[ti];
	    }
	  }
#pragma omp barrier
@@ -541,8 +512,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	    // if this is the very first time, we should actually use
	    // ->write_to_disk, not ->append_to_disk
	    // ******************************************************
	    vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
	    delete vtppoanarray[0];

#ifdef MPI_VERSION
	    if (mpidata->mpirunning) {
@@ -557,10 +526,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
		// delete p_output;
		
		// get the data from process rr, creating a new virtual binary file
		VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr);
		// append to disk and delete virtual binary file
		vtppoanp->append_to_disk(output_path + "/c_TPPOAN");
		delete vtppoanp;
		int test = MPI_Barrier(MPI_COMM_WORLD);
	      }
	    }
@@ -577,7 +542,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
#pragma omp barrier
	if (myompthread == 0) {
	  delete[] p_outarray;
	  delete[] vtppoanarray;
	}
	{
	  string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
@@ -631,7 +595,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
    int ompnumthreads = 1;
    ClusterOutputInfo **p_outarray = NULL;
    VirtualBinaryFile **vtppoanarray = NULL;
    int myjxi488startoffset;
    int myMPIstride = ompnumthreads;
    int myMPIblock = ompnumthreads;
@@ -654,13 +617,11 @@ void cluster(const string& config_file, const string& data_file, const string& o
	MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
	// allocate virtual files for each thread
	p_outarray = new ClusterOutputInfo*[ompnumthreads];
	vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
      }
#pragma omp barrier
      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
      ClusterIterationData *cid_2 = NULL;
      ClusterOutputInfo *p_output_2 = NULL;
      VirtualBinaryFile *vtppoanp_2 = NULL;
      // PLACEHOLDER
      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
      if (myompthread == 0) {
@@ -678,9 +639,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
	// each thread opens new virtual files and stores their pointers in the shared array
	vtppoanp_2 = new VirtualBinaryFile();
	// each thread puts a copy of the pointers to its virtual files in the shared arrays
	vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier
	if (myompthread==0) logger->log("Syncing OpenMP threads and starting one iteration block on wavelengths\n");
	// ok, now I can actually start the parallel calculations
@@ -698,7 +656,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
	    p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, iterstodo);
	    p_outarray[0] = p_output_2;
	  }
	  int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	  int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path);
	} else {
	  p_outarray[myompthread] = new ClusterOutputInfo(1);
	}
@@ -710,8 +668,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	    p_outarray[0]->insert(*(p_outarray[ti]));
	    delete p_outarray[ti];
	    p_outarray[ti] = NULL;
	    vtppoanarray[0]->append(*(vtppoanarray[ti]));
	    delete vtppoanarray[ti];
	  }
	  // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them
	  for (int rr=1; rr<mpidata->nprocs; rr++) {
@@ -719,8 +675,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	      p_outarray[0]->mpisend(mpidata);
	      delete p_outarray[0];
	      p_outarray[0] = NULL;
	      vtppoanarray[0]->mpisend(mpidata);
	      delete vtppoanarray[0];
	    }
	    int test = MPI_Barrier(MPI_COMM_WORLD);
	  }
@@ -731,7 +685,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
#pragma omp barrier
      if (myompthread == 0) {
	delete[] p_outarray;
	delete[] vtppoanarray;
      }
      delete cid_2;

@@ -753,7 +706,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
int cluster_jxi488_cycle(
  int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf,
  ScatteringAngles *sa, ClusterIterationData *cid, ClusterOutputInfo *output,
  const string& output_path, VirtualBinaryFile *vtppoanp
  const string& output_path
) {
  int nxi = sconf->number_of_scales;
  const dcomplex cc0 = 0.0 + I * 0.0;
@@ -919,15 +872,11 @@ int cluster_jxi488_cycle(
  outam0->write_to_disk(outam0_name);
  delete outam0;
#endif // DEBUG_AM
#ifdef USE_TARGET_OFFLOAD
  if (rs.use_offload) {
    cms_gpu(cid->am, cid->c1);
  } else {
    cms(cid->am, cid->c1);
  }
#else
  cms(cid->am, cid->c1);
#endif // USE_TARGET_OFFLOAD
#ifdef DEBUG_AM
  VirtualAsciiFile *outam1 = new VirtualAsciiFile();
  string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt";
@@ -1067,7 +1016,6 @@ int cluster_jxi488_cycle(
  output->vec_qschut[jindex - 1] = qschu;
  output->vec_pschut[jindex - 1] = pschu;
  output->vec_s0magt[jindex - 1] = s0mag;
  vtppoanp->append_line(VirtualBinaryLine(cid->vk));
  pcrsm0(cid->vk, exri, inpol, cid->c1);
  apcra(cid->zpv, cid->c1->le, cid->c1->am0m, inpol, sqk, cid->gapm, cid->gappm);
#ifdef USE_NVTX
@@ -1169,45 +1117,9 @@ int cluster_jxi488_cycle(
	    jw = 1;
	  }
	  // label 196
	  vtppoanp->append_line(VirtualBinaryLine(th));
	  vtppoanp->append_line(VirtualBinaryLine(ph));
	  vtppoanp->append_line(VirtualBinaryLine(ths));
	  vtppoanp->append_line(VirtualBinaryLine(phs));
	  vtppoanp->append_line(VirtualBinaryLine(cid->scan));
	  if (jaw != 0) {
	    jaw = 0;
	    mextc(cid->vk, exri, cid->c1->fsacm, cid->cextlr, cid->cext);
	    // We now have some implicit loops writing to binary
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cext[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      double value = cid->c1->scscm[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->scscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->scscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->c1->ecscm[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->ecscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->ecscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 3; i++) {
	      for (int j = 0; j < 2; j++) {
		double value = cid->gapm[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->gappm[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->gappm[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    int jlr = 2;
	    for (int ilr210 = 1; ilr210 <= 2; ilr210++) {
	      int ipol = (ilr210 % 2 == 0) ? 1 : -1;
@@ -1230,13 +1142,6 @@ int cluster_jxi488_cycle(
	      double s0magm = cabs(s0m) * cs0;
	      double rfinrm = real(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / real(cid->c1->tfsas);
	      double extcrm = imag(cid->c1->fsacm[ilr210 - 1][ilr210 - 1]) / imag(cid->c1->tfsas);
	      // if (inpol == 0) {
	      // sprintf(virtual_line, "   LIN %2d\n", ipol);
	      // output->append_line(virtual_line);
	      // } else { // label 206
	      // sprintf(virtual_line, "  CIRC %2d\n", ipol);
	      // output->append_line(virtual_line);
	      // }
	      // label 208
	      if (ipol == -1) {
		output->vec_scc1[jindex - 1] = scasm;
@@ -1352,101 +1257,8 @@ int cluster_jxi488_cycle(
	  mmulc(cid->c1->vint, cid->cmullr, cid->cmul);
	  if (jw != 0) {
	    jw = 0;
	    // Some implicit loops writing to binary.
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cext[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      double value = cid->c1->scsc[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->scscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->scscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->c1->ecsc[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->ecscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->ecscp[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 3; i++) {
	      for (int j = 0; j < 2; j++) {
		double value = cid->gap[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->gapp[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->gapp[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      for (int j = 0; j < 3; j++) {
		double value = cid->tqce[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->tqcpe[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->tqcpe[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      for (int j = 0; j < 3; j++) {
		double value = cid->tqcs[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->tqcps[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->tqcps[i][j]);
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 3; i++) {
	      double value = cid->u[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->up[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->un[i];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	  }
	  // label 254
	  for (int i = 0; i < 16; i++) {
	    double value = real(cid->c1->vint[i]);
	    // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	    vtppoanp->append_line(VirtualBinaryLine(value));
	    value = imag(cid->c1->vint[i]);
	    // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	    vtppoanp->append_line(VirtualBinaryLine(value));
	  }
	  for (int i = 0; i < 4; i++) {
	    for (int j = 0; j < 4; j++) {
	      double value = cid->cmul[i][j];
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	  }
	  int jlr = 2;
	  for (int ilr290 = 1; ilr290 <= 2; ilr290++) {
	    int ipol = (ilr290 % 2 == 0) ? 1 : -1;
@@ -1588,23 +1400,6 @@ int cluster_jxi488_cycle(
	  }
	  if (iavm != 0) {
	    mmulc(cid->c1->vintm, cid->cmullr, cid->cmul);
	    // Some implicit loops writing to binary.
	    for (int i = 0; i < 16; i++) {
	      double value;
	      value = real(cid->c1->vintm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->vintm[i]);
	      // tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cmul[i][j];
		// tppoan.write(reinterpret_cast<char *>(&value), sizeof(double));
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    // label 318
	    for (int i = 0; i < 4; i++) {
	      oindex = 16 * (jindex - 1) + 4 * i; // if IAVM fails, try adding directions