Commit 8b65bab6 authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

iterative refinement now checks for convergence only on first wavelength,...

iterative refinement now checks for convergence only on first wavelength, afterwards it assumes a fixed number of refinement iterations
parent 67e2b24a
Loading
Loading
Loading
Loading
+15 −1
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
  // Initialise MAGMA
  //===========
#ifdef USE_MAGMA
  // GMu note: MAGMA does not necessarily rely on CUDA, it may just as well run on openCL or HIP, we should consider alternative ways to detect the number of devices if MAGMA is not using CUDA but something else
  cudaGetDeviceCount(&device_count);
  logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
  logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
@@ -313,6 +314,12 @@ void cluster(const string& config_file, const string& data_file, const string& o

      // do the first iteration on jxi488 separately, since it seems to be different from the others
      int jxi488 = 1;
      // In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
      cid->refinemode = 2;
      // maxrefiters and accuracygoal should be configurable and preferably set somewhere else
      cid->maxrefiters = 20;
      int initialmaxrefiters = cid->maxrefiters;
      cid->accuracygoal = 1e-6;
      chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
      nvtxRangePush("First iteration");
@@ -338,6 +345,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
      message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
      logger->log(message);
      time_logger->log(message);
      /* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
      cid->refinemode = 0;
      /* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
      if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
      if (jer != 0) {
	// First loop failed. Halt the calculation.
	fclose(timing_file);
@@ -806,7 +817,10 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
#ifdef USE_NVTX
  nvtxRangePush("Invert the matrix");
#endif
  invert_matrix(cid->am, ndit, jer, mxndm, cid->proc_device);
  // we the accuracygoal in, get the actual accuracy back out
  double actualaccuracy = cid->accuracygoal;
  invert_matrix(cid->am, ndit, jer, cid->maxrefiters, actualaccuracy, cid->refinemode, mxndm, cid->proc_device);
  // in principle, we should check whether the returned actualaccuracy is indeed lower than the accuracygoal, and do something about it if not
#ifdef DEBUG_AM
  VirtualAsciiFile *outam2 = new VirtualAsciiFile();
  string outam2_name = output_path + "/c_AM2_JXI" + to_string(jxi488) + ".txt";
+3 −0
Original line number Diff line number Diff line
@@ -621,6 +621,9 @@ public:
  int lastxi;
  //! \brief ID of the GPU used by one MPI process.
  int proc_device;
  int refinemode;
  int maxrefiters;
  double accuracygoal;

  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
  
+1 −1
Original line number Diff line number Diff line
@@ -38,6 +38,6 @@
 * optional, defaults to 0).
 * \param target_device: `int` ID of target GPU, if available (defaults to 0).
 */
void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size=0, int target_device=0);
void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, double &accuracygoal, int refinemode, np_int max_size=0, int target_device=0);

#endif
+1 −1
Original line number Diff line number Diff line
@@ -47,6 +47,6 @@ void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id=0);
 * \param accuracygoal: `double` Accuracy to achieve in iterative refinement, defined as the module of the maximum difference between the identity matrix and the matrix product of the (approximate) inverse times the original matrix. On return, it contains the actually achieved accuracy
 * \param device_id: `int` ID of the device for matrix inversion offloading.
 */
void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int maxrefiters, double &accuracygoal, int device_id);
void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxrefiters, double &accuracygoal, int refinemode, int device_id);

#endif
+5 −5
Original line number Diff line number Diff line
@@ -46,20 +46,20 @@ extern void lucin(dcomplex **mat, np_int max_size, np_int size, int &ier);

using namespace std;

void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size, int target_device) {
void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, double &accuracygoal, int refinemode, np_int max_size, int target_device) {
  ier = 0;
#ifdef USE_MAGMA
#ifdef USE_REFINEMENT
  // try using the iterative refinement to obtain a more accurate solution
  const int maxrefiters = 6;
  double accuracygoal = 1e-6;
  magma_zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, target_device);
  // we pass to magma_zinvert_and_refine() the accuracygoal in, get the actual
  // accuracy back out
  magma_zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, refinemode, target_device);
#elif
  magma_zinvert(mat, size, ier, target_device);
#endif  
#elif defined USE_LAPACK
#ifdef USE_REFINEMENT
  zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal);
  zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, refinemode);
#elif
  zinvert(mat, size, ier);
#endif
Loading