Commit 9d8d0024 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Initialize thread-local outputs only when actually needed

parent 2028cb6f
Loading
Loading
Loading
Loading
+11 −6
Original line number Diff line number Diff line
@@ -404,6 +404,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
	  cid_2 = cid;
	  // OMP thread 0 of MPI process 0 holds the pointer to the full output structure
	  p_output_2 = p_output;
	  p_outarray[0] = p_output_2;
	} else {
	  // this is not thread 0, so do create fresh copies of all local variables
	  cid_2 = new ClusterIterationData(*cid);
@@ -422,7 +423,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	  vtppoanp_2 = new VirtualBinaryFile();
	  // each thread opens new virtual files and stores their pointers in the shared array
	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
	  p_outarray[myompthread] = p_output_2;
	  vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier

@@ -431,6 +431,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
	    if (myompthread > 0) {
	      // UPDATE: non-0 threads need to allocate memory for one scale at a time.
	      p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	      p_outarray[myompthread] = p_output_2;
	    }
	    int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	  }
@@ -446,6 +447,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
	      if (p_outarray[ti] != NULL) {
		p_outarray[0]->insert(*(p_outarray[ti]));
		delete p_outarray[ti];
		p_outarray[ti] = NULL;
	      }
	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
	      delete vtppoanarray[ti];
@@ -594,14 +596,9 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
	// Thread 0 of non-zero MPI processes needs to allocate memory for the
	// output of all threads.
	if (myompthread == 0)
	  p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
	// each thread opens new virtual files and stores their pointers in the shared array
	vtppoanp_2 = new VirtualBinaryFile();
	// each thread puts a copy of the pointers to its virtual files in the shared arrays
	p_outarray[myompthread] = p_output_2;
	vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier
	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
@@ -609,7 +606,14 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
	if (myjxi488 <= cid_2->number_of_scales) {
	  if (myompthread > 0) {
	    // UPDATE: non-0 threads need to allocate memory for one scale at a time.
	    p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	    p_outarray[myompthread] = p_output_2;
	  } else {
	    // Thread 0 of non-zero MPI processes needs to allocate memory for the
	    // output of all threads.
	    p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
	    p_outarray[0] = p_output_2;
	  }
	  int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	} // close the OMP parallel for loop
@@ -621,6 +625,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
	    if (p_outarray[ti] != NULL) {
	      p_outarray[0]->insert(*(p_outarray[ti]));
	      delete p_outarray[ti];
	      p_outarray[ti] = NULL;
	    }
	    vtppoanarray[0]->append(*(vtppoanarray[ti]));
	    delete vtppoanarray[ti];