iterative refinement now checks for convergence only on first wavelength,... (8b65bab6) · Commits · Giacomo Mulas / NP_TMcode

src/cluster/cluster.cpp

+15 −1

Original line number	Diff line number	Diff line
		@@ -119,6 +119,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
		// Initialise MAGMA
		//===========
		#ifdef USE_MAGMA
		// GMu note: MAGMA does not necessarily rely on CUDA, it may just as well run on openCL or HIP, we should consider alternative ways to detect the number of devices if MAGMA is not using CUDA but something else
		cudaGetDeviceCount(&device_count);
		logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
		logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
		@@ -313,6 +314,12 @@ void cluster(const string& config_file, const string& data_file, const string& o

		// do the first iteration on jxi488 separately, since it seems to be different from the others
		int jxi488 = 1;
		// In the first iteration, if refinement is enabled, determine the number of refinement iterations required to arrive at the target accuracy (if achievable in a reasonable number of iterations)
		cid->refinemode = 2;
		// maxrefiters and accuracygoal should be configurable and preferably set somewhere else
		cid->maxrefiters = 20;
		int initialmaxrefiters = cid->maxrefiters;
		cid->accuracygoal = 1e-6;
		chrono::time_point<chrono::high_resolution_clock> start_iter_1 = chrono::high_resolution_clock::now();
		#ifdef USE_NVTX
		nvtxRangePush("First iteration");
		@@ -338,6 +345,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
		message = "INFO: First iteration took " + to_string(elapsed.count()) + "s.\n";
		logger->log(message);
		time_logger->log(message);
		/* for the next iterations, just always do maxiter iterations, assuming the accuracy is good enough */
		cid->refinemode = 0;
		/* add an extra iteration for margin, if this does not exceed initialmaxrefiters */
		if (cid->maxrefiters < initialmaxrefiters) cid->maxrefiters++;
		if (jer != 0) {
		// First loop failed. Halt the calculation.
		fclose(timing_file);
		@@ -806,7 +817,10 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
		#ifdef USE_NVTX
		nvtxRangePush("Invert the matrix");
		#endif
		invert_matrix(cid->am, ndit, jer, mxndm, cid->proc_device);
		// we the accuracygoal in, get the actual accuracy back out
		double actualaccuracy = cid->accuracygoal;
		invert_matrix(cid->am, ndit, jer, cid->maxrefiters, actualaccuracy, cid->refinemode, mxndm, cid->proc_device);
		// in principle, we should check whether the returned actualaccuracy is indeed lower than the accuracygoal, and do something about it if not
		#ifdef DEBUG_AM
		VirtualAsciiFile *outam2 = new VirtualAsciiFile();
		string outam2_name = output_path + "/c_AM2_JXI" + to_string(jxi488) + ".txt";

src/include/Commons.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -621,6 +621,9 @@ public:
		int lastxi;
		//! \brief ID of the GPU used by one MPI process.
		int proc_device;
		int refinemode;
		int maxrefiters;
		double accuracygoal;

		ClusterIterationData(GeometryConfiguration gconf, ScattererConfiguration sconf, const mixMPI *mpidata, const int device_count);

src/include/algebraic.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -38,6 +38,6 @@
		* optional, defaults to 0).
		* \param target_device: `int` ID of target GPU, if available (defaults to 0).
		*/
		void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size=0, int target_device=0);
		void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, double &accuracygoal, int refinemode, np_int max_size=0, int target_device=0);

		#endif

src/include/magma_calls.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -47,6 +47,6 @@ void magma_zinvert(dcomplex **mat, np_int n, int &jer, int device_id=0);
		* \param accuracygoal: `double` Accuracy to achieve in iterative refinement, defined as the module of the maximum difference between the identity matrix and the matrix product of the (approximate) inverse times the original matrix. On return, it contains the actually achieved accuracy
		* \param device_id: `int` ID of the device for matrix inversion offloading.
		*/
		void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int maxrefiters, double &accuracygoal, int device_id);
		void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxrefiters, double &accuracygoal, int refinemode, int device_id);

		#endif

src/libnptm/algebraic.cpp

+5 −5

Original line number	Diff line number	Diff line
		@@ -46,20 +46,20 @@ extern void lucin(dcomplex **mat, np_int max_size, np_int size, int &ier);

		using namespace std;

		void invert_matrix(dcomplex **mat, np_int size, int &ier, np_int max_size, int target_device) {
		void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, double &accuracygoal, int refinemode, np_int max_size, int target_device) {
		ier = 0;
		#ifdef USE_MAGMA
		#ifdef USE_REFINEMENT
		// try using the iterative refinement to obtain a more accurate solution
		const int maxrefiters = 6;
		double accuracygoal = 1e-6;
		magma_zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, target_device);
		// we pass to magma_zinvert_and_refine() the accuracygoal in, get the actual
		// accuracy back out
		magma_zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, refinemode, target_device);
		#elif
		magma_zinvert(mat, size, ier, target_device);
		#endif
		#elif defined USE_LAPACK
		#ifdef USE_REFINEMENT
		zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal);
		zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, refinemode);
		#elif
		zinvert(mat, size, ier);
		#endif