Commit b609fec5 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Adapt ClusterOutputInfo to work with OpenMP

parent 8cc197db
Loading
Loading
Loading
Loading
+16 −15
Original line number Diff line number Diff line
@@ -419,12 +419,8 @@ void cluster(const string& config_file, const string& data_file, const string& o
	  // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	  int myjxi488 = ixi488+myompthread;
	  // each thread opens new virtual files and stores their pointers in the shared array
	  if (myompthread > 0) {
	    // UPDATE: non-0 threads need to allocate memory for one scale at a time.
	    p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	  }
	  vtppoanp_2 = new VirtualBinaryFile();
	  // each thread opens new virtual files and stores their pointers in the shared array
	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
	  p_outarray[myompthread] = p_output_2;
	  vtppoanarray[myompthread] = vtppoanp_2;
@@ -432,6 +428,10 @@ void cluster(const string& config_file, const string& data_file, const string& o

	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
	  if (myjxi488 <= cid_2->number_of_scales) {
	    if (myompthread > 0) {
	      // UPDATE: non-0 threads need to allocate memory for one scale at a time.
	      p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	    }
	    int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	  }
#pragma omp barrier
@@ -443,8 +443,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
	  // threads different from 0 append their virtual files to the one of thread 0, and delete them
	  if (myompthread == 0) {
	    for (int ti=1; ti<ompnumthreads; ti++) {
	      if (p_outarray[ti] != NULL) {
		p_outarray[0]->insert(*(p_outarray[ti]));
		delete p_outarray[ti];
	      }
	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
	      delete vtppoanarray[ti];
	    }
@@ -481,10 +483,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	      }
	    }
#endif
	    // ClusterOutputInfo : the VirtualAsciiFile instances were appended to
	    // disk here. This is no longer the case.
	    // p_outarray[0]->write(output_path + "/c_OCLU", "LEGACY");
	    // delete p_outarray[0];
	  }
	  // end block writing to disk
#ifdef USE_NVTX
@@ -599,8 +597,6 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// output of all threads.
	if (myompthread == 0)
	  p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads);
	else
	  p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	// each thread opens new virtual files and stores their pointers in the shared array
	vtppoanp_2 = new VirtualBinaryFile();
	// each thread puts a copy of the pointers to its virtual files in the shared arrays
@@ -611,6 +607,9 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// ok, now I can actually start the parallel calculations
	// each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
	if (myjxi488 <= cid_2->number_of_scales) {
	  if (myompthread > 0) {
	    p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	  }
	  int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	} // close the OMP parallel for loop

@@ -618,8 +617,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
	// threads different from 0 append their virtual files to the one of thread 0, and delete them
	if (myompthread == 0) {
	  for (int ti=1; ti<ompnumthreads; ti++) {
	    if (p_outarray[ti] != NULL) {
	      p_outarray[0]->insert(*(p_outarray[ti]));
	      delete p_outarray[ti];
	    }
	    vtppoanarray[0]->append(*(vtppoanarray[ti]));
	    delete vtppoanarray[ti];
	  }
+2 −1
Original line number Diff line number Diff line
@@ -1363,7 +1363,8 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorClu
    ecscm[ci] = rhs.ecscm[ci];
  }
  v3j0 = new double[_nv3j];
  for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j];
  // for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; REPORT: AAAAH! ORRORE E DISGUSTO!
  for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[vj];
  ind3j = new int*[_lm + 1];
  for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii);
  rac3j = new double[_lmtpo];
+18 −18
Original line number Diff line number Diff line
@@ -630,14 +630,14 @@ int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) {
    memcpy(vec_dir_sat21 + offset, rhs.vec_dir_sat21, chunk_size * sizeof(dcomplex));
    memcpy(vec_dir_sat12 + offset, rhs.vec_dir_sat12, chunk_size * sizeof(dcomplex));
    memcpy(vec_dir_sat22 + offset, rhs.vec_dir_sat22, chunk_size * sizeof(dcomplex));
    memcpy(vec_dir_scc1 + offset, rhs.vec_scc1, chunk_size * sizeof(double));
    memcpy(vec_dir_scc2 + offset, rhs.vec_scc2, chunk_size * sizeof(double));
    memcpy(vec_dir_abc1 + offset, rhs.vec_abc1, chunk_size * sizeof(double));
    memcpy(vec_dir_abc2 + offset, rhs.vec_abc2, chunk_size * sizeof(double));
    memcpy(vec_dir_exc1 + offset, rhs.vec_exc1, chunk_size * sizeof(double));
    memcpy(vec_dir_exc2 + offset, rhs.vec_exc2, chunk_size * sizeof(double));
    memcpy(vec_dir_albedc1 + offset, rhs.vec_albedc1, chunk_size * sizeof(double));
    memcpy(vec_dir_albedc2 + offset, rhs.vec_albedc2, chunk_size * sizeof(double));
    memcpy(vec_dir_scc1 + offset, rhs.vec_dir_scc1, chunk_size * sizeof(double));
    memcpy(vec_dir_scc2 + offset, rhs.vec_dir_scc2, chunk_size * sizeof(double));
    memcpy(vec_dir_abc1 + offset, rhs.vec_dir_abc1, chunk_size * sizeof(double));
    memcpy(vec_dir_abc2 + offset, rhs.vec_dir_abc2, chunk_size * sizeof(double));
    memcpy(vec_dir_exc1 + offset, rhs.vec_dir_exc1, chunk_size * sizeof(double));
    memcpy(vec_dir_exc2 + offset, rhs.vec_dir_exc2, chunk_size * sizeof(double));
    memcpy(vec_dir_albedc1 + offset, rhs.vec_dir_albedc1, chunk_size * sizeof(double));
    memcpy(vec_dir_albedc2 + offset, rhs.vec_dir_albedc2, chunk_size * sizeof(double));
    memcpy(vec_dir_qscc1 + offset, rhs.vec_dir_qscc1, chunk_size * sizeof(double));
    memcpy(vec_dir_qscc2 + offset, rhs.vec_dir_qscc2, chunk_size * sizeof(double));
    memcpy(vec_dir_qabc1 + offset, rhs.vec_dir_qabc1, chunk_size * sizeof(double));
@@ -658,16 +658,16 @@ int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) {
    memcpy(vec_dir_sac21 + offset, rhs.vec_dir_sac21, chunk_size * sizeof(dcomplex));
    memcpy(vec_dir_sac12 + offset, rhs.vec_dir_sac12, chunk_size * sizeof(dcomplex));
    memcpy(vec_dir_sac22 + offset, rhs.vec_dir_sac22, chunk_size * sizeof(dcomplex));
    memcpy(vec_dir_qschuc1 + offset, rhs.vec_qschuc1, chunk_size * sizeof(double));
    memcpy(vec_dir_qschuc2 + offset, rhs.vec_qschuc2, chunk_size * sizeof(double));
    memcpy(vec_dir_pschuc1 + offset, rhs.vec_pschuc1, chunk_size * sizeof(double));
    memcpy(vec_dir_pschuc2 + offset, rhs.vec_pschuc2, chunk_size * sizeof(double));
    memcpy(vec_dir_s0magc1 + offset, rhs.vec_s0magc1, chunk_size * sizeof(double));
    memcpy(vec_dir_s0magc2 + offset, rhs.vec_s0magc2, chunk_size * sizeof(double));
    memcpy(vec_dir_cosavc1 + offset, rhs.vec_cosavc1, chunk_size * sizeof(double));
    memcpy(vec_dir_cosavc2 + offset, rhs.vec_cosavc2, chunk_size * sizeof(double));
    memcpy(vec_dir_raprc1 + offset, rhs.vec_raprc1, chunk_size * sizeof(double));
    memcpy(vec_dir_raprc2 + offset, rhs.vec_raprc2, chunk_size * sizeof(double));
    memcpy(vec_dir_qschuc1 + offset, rhs.vec_dir_qschuc1, chunk_size * sizeof(double));
    memcpy(vec_dir_qschuc2 + offset, rhs.vec_dir_qschuc2, chunk_size * sizeof(double));
    memcpy(vec_dir_pschuc1 + offset, rhs.vec_dir_pschuc1, chunk_size * sizeof(double));
    memcpy(vec_dir_pschuc2 + offset, rhs.vec_dir_pschuc2, chunk_size * sizeof(double));
    memcpy(vec_dir_s0magc1 + offset, rhs.vec_dir_s0magc1, chunk_size * sizeof(double));
    memcpy(vec_dir_s0magc2 + offset, rhs.vec_dir_s0magc2, chunk_size * sizeof(double));
    memcpy(vec_dir_cosavc1 + offset, rhs.vec_dir_cosavc1, chunk_size * sizeof(double));
    memcpy(vec_dir_cosavc2 + offset, rhs.vec_dir_cosavc2, chunk_size * sizeof(double));
    memcpy(vec_dir_raprc1 + offset, rhs.vec_dir_raprc1, chunk_size * sizeof(double));
    memcpy(vec_dir_raprc2 + offset, rhs.vec_dir_raprc2, chunk_size * sizeof(double));
    memcpy(vec_dir_flc1 + offset, rhs.vec_dir_flc1, chunk_size * sizeof(double));
    memcpy(vec_dir_flc2 + offset, rhs.vec_dir_flc2, chunk_size * sizeof(double));
    memcpy(vec_dir_frc1 + offset, rhs.vec_dir_frc1, chunk_size * sizeof(double));