Commit 4ade768e authored by Mulas, Giacomo's avatar Mulas, Giacomo
Browse files

- fix configure to properly detect cublas even when no additional compilation flags are included

- accept even an accuracy as bad as 0.1 in refinement, if no better can be achieved
parent 35cd635f
Loading
Loading
Loading
Loading
+4 −10
Original line number Diff line number Diff line
@@ -25505,13 +25505,10 @@ then :
        fi
      fi # end of pkg-config decision tree
    fi # end of CUDAFLAGS user override protection
    if test "x$CUDAFLAGS" != "x"; then
      # somehow CUDAFLAGS was defined
    if test "x $CUDAFLAGS $CUDALDFLAGS" != "x"; then
      # somehow CUDAFLAGS or CUDALDFLAGS was defined
      export CUDAFLAGS
      export CUBLASFLAGS="-DUSE_CUBLAS ${CUDAFLAGS}"
    fi
    if test "x$CUDALDFLAGS" != "x"; then
      # somehow CUDALDFLAGS was defined
      export CUDALDFLAGS
      export CUBLASLDFLAGS="${CUDALDFLAGS}"
    fi
@@ -25583,13 +25580,10 @@ else case e in #(
        fi
      fi # end of pkg-config decision tree
    fi # end of CUDAFLAGS user override protection
    if test "x$CUDAFLAGS" != "x"; then
      # somehow CUDAFLAGS was defined
    if test "x $CUDAFLAGS $CUDALDFLAGS" != "x"; then
      # somehow CUDAFLAGS or CUDALDFLAGS was defined
      export CUDAFLAGS
      export CUBLASFLAGS="-DUSE_CUBLAS ${CUDAFLAGS}"
    fi
    if test "x$CUDALDFLAGS" != "x"; then
      # somehow CUDALDFLAGS was defined
      export CUDALDFLAGS
      export CUBLASLDFLAGS="${CUDALDFLAGS}"
    fi
+2 −5
Original line number Diff line number Diff line
@@ -213,13 +213,10 @@ m4_define(
        fi
      fi # end of pkg-config decision tree
    fi # end of CUDAFLAGS user override protection
    if test "x$CUDAFLAGS" != "x"; then
      # somehow CUDAFLAGS was defined
    if test "x $CUDAFLAGS $CUDALDFLAGS" != "x"; then
      # somehow CUDAFLAGS or CUDALDFLAGS was defined
      export CUDAFLAGS
      export CUBLASFLAGS="-DUSE_CUBLAS ${CUDAFLAGS}"
    fi
    if test "x$CUDALDFLAGS" != "x"; then
      # somehow CUDALDFLAGS was defined
      export CUDALDFLAGS
      export CUBLASLDFLAGS="${CUDALDFLAGS}"
    fi
+14 −17
Original line number Diff line number Diff line
@@ -126,10 +126,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
  Logger *logger = new Logger(LOG_DEBG);
  int device_count = 0;

#ifdef USE_CUBLAS
  cudaGetDeviceCount(&device_count);
  logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
#elif defined USE_MAGMA
#ifdef USE_MAGMA
  //===========
  // Initialise MAGMA
  //===========
@@ -155,7 +152,11 @@ void cluster(const string& config_file, const string& data_file, const string& o
    delete logger;
    return;
  }
#endif // end MAGMA initialisation
// end MAGMA initialisation
#elif defined USE_CUBLAS
  cudaGetDeviceCount(&device_count);
  logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
#endif 

  //===========================
  // the following only happens on MPI process 0
@@ -297,10 +298,10 @@ void cluster(const string& config_file, const string& data_file, const string& o
      // Create empty virtual binary file
      VirtualBinaryFile *vtppoanp = new VirtualBinaryFile();
      string tppoan_name = output_path + "/c_TPPOAN";
#ifdef USE_CUBLAS
      logger->log("INFO: using CUBLAS calls.\n", LOG_INFO);
#elif defined USE_MAGMA
#ifdef USE_MAGMA
      logger->log("INFO: using MAGMA calls.\n", LOG_INFO);
#elif defined USE_CUBLAS
      logger->log("INFO: using CUBLAS calls.\n", LOG_INFO);
#elif defined USE_LAPACK
      logger->log("INFO: using LAPACK calls.\n", LOG_INFO);
#else
@@ -564,9 +565,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
      
    delete sconf;
    delete gconf;
#ifdef USE_CUBLAS
    // just a placeholder to skip magma finalisation if we are using cublas
#elif defined USE_MAGMA
#ifdef USE_MAGMA
    logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n");
    magma_finalize();
#endif
@@ -689,9 +688,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
    delete sconf;
    delete gconf;
#endif
#ifdef USE_CUBLAS
    // placeholder to avoid magma if using cublas
#elif defined USE_MAGMA
#ifdef USE_MAGMA
    logger->log("INFO: Process " + to_string(mpidata->rank) + " finalizes MAGMA.\n");
    magma_finalize();
#endif
@@ -842,7 +839,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
#ifdef USE_NVTX
  nvtxRangePush("Invert the matrix");
#endif
  // we the accuracygoal in, get the actual accuracy back out
  // we put the accuracygoal in, get the actual accuracy back out
  double actualaccuracy = cid->accuracygoal;
  invert_matrix(cid->am, ndit, jer, cid->maxrefiters, actualaccuracy, cid->refinemode, mxndm, cid->proc_device);
  // in principle, we should check whether the returned actualaccuracy is indeed lower than the accuracygoal, and do something about it if not
@@ -850,8 +847,8 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
  if (cid->refinemode==2) {
    message = "INFO: calibration obtained accuracy " + to_string(actualaccuracy) + " (" + to_string(cid->accuracygoal) + " requested) in " + to_string(cid->maxrefiters) + " refinement iterations\n";
    logger->log(message);
    if (actualaccuracy > 1e-2) {
      printf("Accuracy worse than 0.01, stopping");
    if (actualaccuracy > 1e-1) {
      printf("Accuracy worse than 0.1, stopping");
      exit(1);
    }
  }
+7 −7
Original line number Diff line number Diff line
@@ -60,13 +60,7 @@ using namespace std;

void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, double &accuracygoal, int refinemode, np_int max_size, int target_device) {
  ier = 0;
#ifdef USE_CUBLAS
#ifdef USE_REFINEMENT
  cublas_zinvert_and_refine(mat, size, maxrefiters, accuracygoal, refinemode, target_device);
#else
  cublas_zinvert(mat, size, target_device);
#endif
#elif defined USE_MAGMA
#ifdef USE_MAGMA
#ifdef USE_REFINEMENT
  // try using the iterative refinement to obtain a more accurate solution
  // we pass to magma_zinvert_and_refine() the accuracygoal in, get the actual
@@ -75,6 +69,12 @@ void invert_matrix(dcomplex **mat, np_int size, int &ier, int &maxrefiters, doub
#else
  magma_zinvert(mat, size, ier, target_device);
#endif  
#elif defined USE_CUBLAS
#ifdef USE_REFINEMENT
  cublas_zinvert_and_refine(mat, size, maxrefiters, accuracygoal, refinemode, target_device);
#else
  cublas_zinvert(mat, size, target_device);
#endif
#elif defined USE_LAPACK
#ifdef USE_REFINEMENT
  zinvert_and_refine(mat, size, ier, maxrefiters, accuracygoal, refinemode);