Commit 027d8e0a authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Merge branch 'script_devel' into 'master'

Fix bug in compilation against MKL

See merge request giacomo.mulas/np_tmcode!113
parents cad36262 1f2499c3
Loading
Loading
Loading
Loading
+16 −198
Original line number Diff line number Diff line
@@ -121,9 +121,12 @@ using namespace std;
 *  \param cid: `InclusionIterationData *` Pointer to an `InclusionIterationData` object.
 *  \param output: `InclusionOutputInfo *` Pointer to an `InclusionOutputInfo` object.
 *  \param output_path: `const string &` Path to the output directory.
 *  \param vtppoanp: `VirtualBinaryFile *` Pointer to a `VirtualBinaryFile` object.
 */
int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, InclusionIterationData *cid, InclusionOutputInfo *output, const string& output_path, VirtualBinaryFile *vtppoanp);
int inclusion_jxi488_cycle(
  int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf,
  ScatteringAngles *sa, InclusionIterationData *cid, InclusionOutputInfo *output,
  const string& output_path
);

/*! \brief C++ implementation of INCLU
 *
@@ -244,12 +247,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	message = "INFO: using RBT for inversion.\n";
	logger->log(message);
      }
      // else if(gconf->invert_mode == RuntimeSettings::INV_MODE_SVD) {
      // 	//message = "INFO: using SVD for inversion.\n";
      // 	message = "ERROR: SVD inversion mode not yet implemented!\n";
      // 	logger->log(message);
      // 	exit(1);
      // }
      // Overlapping spheres test
      double tolerance = gconf->tolerance;
      if (tolerance < 0.0) {
@@ -351,9 +348,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
      double exdc = sconf->exdc;
      double exri = sqrt(exdc);

      // Create an empty bynary file
      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
      string tppoan_name = output_path + "/c_TPPOAN";
#ifdef USE_MAGMA
      logger->log("INFO: using MAGMA calls.\n", LOG_INFO);
#elif defined USE_LAPACK
@@ -370,17 +364,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
      int nph = p_scattering_angles->nph;
      int nphs = p_scattering_angles->nphs;
      
      //========================
      // write a block of info to virtual binary file
      //========================
      vtppoanp->append_line(VirtualBinaryLine(iavm));
      vtppoanp->append_line(VirtualBinaryLine(isam));
      vtppoanp->append_line(VirtualBinaryLine(inpol));
      vtppoanp->append_line(VirtualBinaryLine(nxi));
      vtppoanp->append_line(VirtualBinaryLine(nth));
      vtppoanp->append_line(VirtualBinaryLine(nph));
      vtppoanp->append_line(VirtualBinaryLine(nths));
      vtppoanp->append_line(VirtualBinaryLine(nphs));
      if (sconf->idfc < 0) {
	cid->vk = cid->xip * cid->wn;
	p_output->vec_vk[0] = cid->vk;
@@ -389,12 +372,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
      int jxi488;
      int jer = 0;

      //==================================================
      // do the first outputs here, so that I open here the new files, afterwards I only append
      //==================================================
      vtppoanp->write_to_disk(output_path + "/c_TPPOAN");
      delete vtppoanp;
      
      // here go the calls that send data to be duplicated on other MPI processes from process 0 to others, using MPI broadcasts, but only if MPI is actually used
#ifdef MPI_VERSION
      if (mpidata->mpirunning) {
@@ -412,7 +389,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
      int myMPIblock = ompnumthreads;
      // Define here shared arrays of virtual ascii and binary files, so that thread 0 will be able to access them all later
      InclusionOutputInfo **p_outarray = NULL;
      VirtualBinaryFile **vtppoanarray = NULL;

#ifdef USE_NVTX
      nvtxRangePush("Parallel loop");
@@ -435,7 +411,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	if (myompthread == 0) {
	  // Initialise some shared variables only on thread 0
	  p_outarray = new InclusionOutputInfo*[ompnumthreads];
	  vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
	  myMPIblock = ompnumthreads;
	  myMPIstride = myMPIblock;
	}
@@ -464,7 +439,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	// To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
	InclusionIterationData *cid_2 = NULL;
	InclusionOutputInfo *p_output_2 = NULL;
	VirtualBinaryFile *vtppoanp_2 = NULL;
	// for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
	if (myompthread == 0) {
	  cid_2 = cid;
@@ -485,10 +459,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	  // the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	  int myjxi488 = ixi488+myompthread;
	  // each thread opens new virtual files and stores their pointers in the shared array
	  vtppoanp_2 = new VirtualBinaryFile();
	  // each thread puts a copy of the pointers to its virtual files in the shared arrays
	  vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier

	  // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism
@@ -498,7 +468,9 @@ void inclusion(const string& config_file, const string& data_file, const string&
	      p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, 1);
	      p_outarray[myompthread] = p_output_2;
	    }
	    int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	    int jer = inclusion_jxi488_cycle(
              myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path
            );
	  } else {
	    if (myompthread > 0) {
	      // If there is no input for this thread, set output pointer to NULL.
@@ -517,8 +489,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	      p_outarray[0]->insert(*(p_outarray[ti]));
	      delete p_outarray[ti];
	      p_outarray[ti] = NULL;
	      vtppoanarray[0]->append(*(vtppoanarray[ti]));
	      delete vtppoanarray[ti];
	    }
	  }
#pragma omp barrier
@@ -526,11 +496,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	  // Collect all virtual files on thread 0 of MPI process 0, and append them to disk
	  //==============================================
	  if (myompthread == 0) {
	    // thread 0 writes its virtual files, now including contributions from all threads, to disk, and deletes them
	    // p_outarray[0]->append_to_disk(output_path + "/c_OINCLU");
	    // delete p_outarray[0];
	    vtppoanarray[0]->append_to_disk(output_path + "/c_TPPOAN");
	    delete vtppoanarray[0];

#ifdef MPI_VERSION
	    if (mpidata->mpirunning) {
@@ -538,17 +503,7 @@ void inclusion(const string& config_file, const string& data_file, const string&
	      for (int rr=1; rr<mpidata->nprocs; rr++) {
		// get the data from process rr by receiving it in total memory structure
		p_outarray[0]->mpireceive(mpidata, rr);
		// get the data from process rr, creating a new virtual ascii file
		// VirtualAsciiFile *p_output = new VirtualAsciiFile(mpidata, rr);
		// append to disk and delete virtual ascii file
		// p_output->append_to_disk(output_path + "/c_OINCLU");
		// delete p_output;
		
		// get the data from process rr, creating a new virtual binary file
		VirtualBinaryFile *vtppoanp = new VirtualBinaryFile(mpidata, rr);
		// append to disk and delete virtual binary file
		vtppoanp->append_to_disk(output_path + "/c_TPPOAN");
		delete vtppoanp;
		
		int test = MPI_Barrier(MPI_COMM_WORLD);
	      }
	    }
@@ -565,7 +520,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
#pragma omp barrier
	if (myompthread == 0) {
	  delete[] p_outarray;
	  delete[] vtppoanarray;
	}
	{
	  string message = "INFO: Closing thread-local output files of thread " + to_string(myompthread) + " and syncing threads.\n";
@@ -619,7 +573,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
    int ompnumthreads = 1;
    InclusionOutputInfo **p_outarray = NULL;
    VirtualBinaryFile **vtppoanarray = NULL;
    int myjxi488startoffset;
    int myMPIstride = ompnumthreads;
    int myMPIblock = ompnumthreads;
@@ -642,13 +595,11 @@ void inclusion(const string& config_file, const string& data_file, const string&
	MPI_Bcast(&myMPIstride, 1, MPI_INT, 0, MPI_COMM_WORLD);
	// allocate virtual files for each thread
	p_outarray = new InclusionOutputInfo*[ompnumthreads];
	vtppoanarray = new VirtualBinaryFile*[ompnumthreads];
      }
#pragma omp barrier
      // To test parallelism, I will now start feeding this function with "clean" copies of the parameters, so that they will not be changed by previous iterations, and each one will behave as the first one. Define all (empty) variables here, so they have the correct scope, then they get different definitions depending on thread number
      InclusionIterationData *cid_2 = NULL;
      InclusionOutputInfo *p_output_2 = NULL;
      VirtualBinaryFile *vtppoanp_2 = NULL;
      // PLACEHOLDER
      // for threads other than the 0, create distinct copies of all relevant data, while for thread 0 just define new references / pointers to the original ones
      if (myompthread == 0) {
@@ -664,10 +615,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	// the parallel loop over MPI processes covers a different set of indices for each thread
#pragma omp barrier
	int myjxi488 = ixi488 + myjxi488startoffset + myompthread;
	// each thread opens new virtual files and stores their pointers in the shared array
	vtppoanp_2 = new VirtualBinaryFile();
	// each thread puts a copy of the pointers to its virtual files in the shared arrays
	vtppoanarray[myompthread] = vtppoanp_2;
#pragma omp barrier
	if (myompthread==0) logger->log("Syncing OpenMP threads and starting the loop on wavelengths\n");
	// ok, now I can actually start the parallel calculations
@@ -685,7 +632,9 @@ void inclusion(const string& config_file, const string& data_file, const string&
	    p_output_2 = new InclusionOutputInfo(sconf, gconf, mpidata, myjxi488, iterstodo);
	    p_outarray[0] = p_output_2;
	  }
	  int jer = inclusion_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2);
	  int jer = inclusion_jxi488_cycle(
            myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path
          );
	} else {
	  p_outarray[myompthread] = new InclusionOutputInfo(1);
	}
@@ -697,8 +646,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	    p_outarray[0]->insert(*(p_outarray[ti]));
	    delete p_outarray[ti];
	    p_outarray[ti] = NULL;
	    vtppoanarray[0]->append(*(vtppoanarray[ti]));
	    delete vtppoanarray[ti];
	  }
	  // thread 0 sends the collected virtualfiles to thread 0 of MPI process 0, then deletes them
	  for (int rr=1; rr<mpidata->nprocs; rr++) {
@@ -706,8 +653,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
	      p_outarray[0]->mpisend(mpidata);
	      delete p_outarray[0];
	      p_outarray[0] = NULL;
	      vtppoanarray[0]->mpisend(mpidata);
	      delete vtppoanarray[0];
	    }
	    int test = MPI_Barrier(MPI_COMM_WORLD);
	  }
@@ -718,7 +663,6 @@ void inclusion(const string& config_file, const string& data_file, const string&
#pragma omp barrier
      if (myompthread == 0) {
	delete[] p_outarray;
	delete[] vtppoanarray;
      }
      delete cid_2;

@@ -737,7 +681,11 @@ void inclusion(const string& config_file, const string& data_file, const string&
  delete logger;
}

int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf, ScatteringAngles *sa, InclusionIterationData *cid, InclusionOutputInfo *output, const string& output_path, VirtualBinaryFile *vtppoanp) {
int inclusion_jxi488_cycle(
  int jxi488, ScattererConfiguration *sconf, GeometryConfiguration *gconf,
  ScatteringAngles *sa, InclusionIterationData *cid, InclusionOutputInfo *output,
  const string& output_path
) {
  int nxi = sconf->number_of_scales;
  const dcomplex cc0 = 0.0 + I * 0.0;
  const double pi = acos(-1.0);
@@ -1001,7 +949,6 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
  double cs0 = 0.25 * cid->vk * cid->vk * cid->vk / acos(0.0);
  double csch = 2.0 * cid->vk * cid->sqsfi / cid->c1->gcs;
  double sqk = cid->vk * cid->vk * exdc;
  vtppoanp->append_line(VirtualBinaryLine(cid->vk));
  pcrsm0(cid->vk, exri, inpol, cid->c1);
  apcra(cid->zpv, cid->c1->le, cid->c1->am0m, inpol, sqk, cid->gapm, cid->gappm);
#ifdef USE_NVTX
@@ -1103,45 +1050,9 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
	    jw = 1;
	  }
	  // label 196
	  vtppoanp->append_line(VirtualBinaryLine(th));
	  vtppoanp->append_line(VirtualBinaryLine(ph));
	  vtppoanp->append_line(VirtualBinaryLine(ths));
	  vtppoanp->append_line(VirtualBinaryLine(phs));
	  vtppoanp->append_line(VirtualBinaryLine(cid->scan));
	  if (jaw != 0) {
	    jaw = 0;
	    mextc(cid->vk, exri, cid->c1->fsacm, cid->cextlr, cid->cext);
	    // We now have some implicit loops writing to binary
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cext[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      double value = cid->c1->scscm[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->scscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->scscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->c1->ecscm[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->ecscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->ecscpm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 3; i++) {
	      for (int j = 0; j < 2; j++) {
		double value = cid->gapm[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->gappm[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->gappm[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    int jlr = 2;
	    for (int ilr210 = 1; ilr210 <= 2; ilr210++) {
	      int ipol = (ilr210 % 2 == 0) ? 1 : -1;
@@ -1157,14 +1068,6 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
	      double qschum = imag(s0m) * csch;
	      double pschum = real(s0m) * csch;
	      double s0magm = cabs(s0m) * cs0;
	      // if (inpol == 0) {
	      // sprintf(virtual_line, "   LIN %2d\n", ipol);
	      // output->append_line(virtual_line);
	      // } else { // label 206
	      // sprintf(virtual_line, "  CIRC %2d\n", ipol);
	      // output->append_line(virtual_line);
	      // }
	      // label 208
	      double rapr = cid->c1->ecscm[ilr210 - 1] - cid->gapm[2][ilr210 - 1];
	      double cosav = cid->gapm[2][ilr210 - 1] / cid->c1->scscm[ilr210 - 1];
	      double fz = rapr;
@@ -1230,79 +1133,8 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
	  mmulc(cid->c1->vint, cid->cmullr, cid->cmul);
	  if (jw != 0) {
	    jw = 0;
	    // Some implicit loops writing to binary.
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cext[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      double value = cid->c1->scsc[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->scscp[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->scscp[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->c1->ecsc[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = real(cid->c1->ecscp[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->ecscp[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 3; i++) {
	      for (int j = 0; j < 2; j++) {
		double value = cid->gap[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->gapp[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->gapp[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      for (int j = 0; j < 3; j++) {
		double value = cid->tqce[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->tqcpe[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->tqcpe[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 2; i++) {
	      for (int j = 0; j < 3; j++) {
		double value = cid->tqcs[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = real(cid->tqcps[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
		value = imag(cid->tqcps[i][j]);
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    for (int i = 0; i < 3; i++) {
	      double value = cid->u[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->up[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = cid->un[i];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	  }
	  // label 254
	  for (int i = 0; i < 16; i++) {
	    double value = real(cid->c1->vint[i]);
	    vtppoanp->append_line(VirtualBinaryLine(value));
	    value = imag(cid->c1->vint[i]);
	    vtppoanp->append_line(VirtualBinaryLine(value));
	  }
	  for (int i = 0; i < 4; i++) {
	    for (int j = 0; j < 4; j++) {
	      double value = cid->cmul[i][j];
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	  }
	  oindex = (jindex - 1) * ndirs + done_dirs;
	  int jlr = 2;
	  for (int ilr290 = 1; ilr290 <= 2; ilr290++) {
@@ -1432,20 +1264,6 @@ int inclusion_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryCo
	  }
	  if (iavm != 0) {
	    mmulc(cid->c1->vintm, cid->cmullr, cid->cmul);
	    // Some implicit loops writing to binary.
	    for (int i = 0; i < 16; i++) {
	      double value;
	      value = real(cid->c1->vintm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	      value = imag(cid->c1->vintm[i]);
	      vtppoanp->append_line(VirtualBinaryLine(value));
	    }
	    for (int i = 0; i < 4; i++) {
	      for (int j = 0; j < 4; j++) {
		double value = cid->cmul[i][j];
		vtppoanp->append_line(VirtualBinaryLine(value));
	      }
	    }
	    // label 318
	    for (int i = 0; i < 4; i++) {
	      oindex = 16 * (jindex - 1) + 4 * i; // if IAVM fails, try adding directions
+6 −0
Original line number Diff line number Diff line
@@ -35,6 +35,12 @@
#include <cstdio>
#include <string>

#ifdef USE_MPI
#ifndef MPI_VERSION
#include <mpi.h>
#endif
#endif

#ifndef INCLUDE_TYPES_H_
#include "../include/types.h"
#endif
+11 −5
Original line number Diff line number Diff line
@@ -586,8 +586,10 @@ ScattererConfiguration::ScattererConfiguration(const ScattererConfiguration& rhs
  for (int si = 0; si < _configurations; si++) {
    _radii_of_spheres[si] = rhs._radii_of_spheres[si];
    _nshl_vec[si] = rhs._nshl_vec[si];
    _rcf[si] = new double[_nshl_vec[si]]();
    for (int sj = 0; sj < _nshl_vec[si]; sj++) _rcf[si][sj] = rhs._rcf[si][sj];
    int expected_layers = _nshl_vec[si];
    if (si == 0 && _use_external_sphere) expected_layers++;
    _rcf[si] = new double[expected_layers]();
    for (int sj = 0; sj < expected_layers; sj++) _rcf[si][sj] = rhs._rcf[si][sj];
  }
  for (int li = 0; li < _max_layers; li++) {
    _dc0_matrix[li] = new dcomplex*[_number_of_spheres];
@@ -630,8 +632,10 @@ ScattererConfiguration::ScattererConfiguration(const mixMPI *mpidata)
  MPI_Bcast(_scale_vec, _number_of_scales, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  int dim3 = (_idfc == 0) ? _number_of_scales : 1;
  for (int si = 0; si < _configurations; si++) {
    _rcf[si] = new double[_nshl_vec[si]]();
    MPI_Bcast(_rcf[si], _nshl_vec[si], MPI_DOUBLE, 0, MPI_COMM_WORLD);
    int expected_layers = _nshl_vec[si];
    if (si == 0 && _use_external_sphere) expected_layers++;
    _rcf[si] = new double[expected_layers]();
    MPI_Bcast(_rcf[si], expected_layers, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  }
  for (int li = 0; li < _max_layers; li++) {
    _dc0_matrix[li] = new dcomplex*[_number_of_spheres];
@@ -665,7 +669,9 @@ void ScattererConfiguration::mpibcast(const mixMPI *mpidata) {
  MPI_Bcast(_scale_vec, _number_of_scales, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  int dim3 = (_idfc == 0) ? _number_of_scales : 1;
  for (int si = 0; si < _configurations; si++) {
    MPI_Bcast(_rcf[si], _nshl_vec[si], MPI_DOUBLE, 0, MPI_COMM_WORLD);
    int expected_layers = _nshl_vec[si];
    if (si == 0 && _use_external_sphere) expected_layers++;
    MPI_Bcast(_rcf[si], expected_layers, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  }
  for (int li = 0; li < _max_layers; li++) {
    for (int lj = 0; lj < _number_of_spheres; lj++) {
+19 −1
Original line number Diff line number Diff line
@@ -64,9 +64,13 @@ void zinvert(dcomplex **mat, np_int n, int &jer, const RuntimeSettings& rs) {
  char buffer[128];
  string message;
  lapack_int info, inc1 = 1;
  lcomplex *arr = &(mat[0][0]);
  lcomplex *arr = (lcomplex *)mat[0];
  lcomplex *arr_orig;
#ifndef USE_MKL
  lcomplex lapack_one = 1.0 + I * 0.0;
#else
  lcomplex lapack_one = {1.0, 0.0};
#endif // USE_MKL
  np_int nn = n * n;
  if (rs.use_refinement && rs.invert_mode != RuntimeSettings::INV_MODE_RBT) {
    lapack_int inc1 = 1;
@@ -131,9 +135,15 @@ lapack_int lapack_newton(
  char buffer[128];
  char lapackNoTrans = 'N';
  const int max_ref_iters = rs.max_ref_iters;
#ifndef USE_MKL
  lcomplex lapack_zero = 0.0 + I * 0.0;
  lcomplex lapack_one = 1.0 + I * 0.0;
  lcomplex lapack_mone = -1.0 + I * 0.0;
#else
  lcomplex lapack_zero = {0.0, 0.0};
  lcomplex lapack_one = {1.0, 0.0};
  lcomplex lapack_mone {-1.0, 0.0};
#endif // USE_MKL
  lapack_int mm = m * m;
  lapack_int incx, incy;
  lcomplex *ax, *r, *unrefined;
@@ -149,7 +159,11 @@ lapack_int lapack_newton(
  incx = 1;
  lapack_int maxindex = izamax_(&mm, a, &incx) - 1;
  lcomplex lapackmax = a[maxindex];
#ifndef USE_MKL
  curmax = cabs(lapackmax);
#else
  curmax = std::sqrt(lapackmax.real * lapackmax.real + lapackmax.imag * lapackmax.imag);
#endif // USE_MKL
  sprintf(buffer, "INFO: largest matrix value has modulus %.5le.\n", curmax);
  message = buffer;
  rs.logger->log(message);
@@ -166,7 +180,11 @@ lapack_int lapack_newton(
    zaxpy_(&m, &lapack_one, id_diag, &incx, ax, &incy);
    maxindex = izamax_(&mm, ax, &incx) - 1;
    lapackmax = ax[maxindex];
#ifndef USE_MKL
    curmax = cabs(lapackmax);
#else
    curmax = std::sqrt(lapackmax.real * lapackmax.real + lapackmax.imag * lapackmax.imag);
#endif  // USE_MKL
    sprintf(buffer, "DEBUG: iteration %d has residue %.5le; target residue is %.5le.\n", ri, curmax, rs.accuracy_goal);
    message = buffer;
    rs.logger->log(message, LOG_DEBG);