Commit c84dc8c5 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Associate multiple GPUs to processes according to MPI rank

parent 0c307b8c
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -104,12 +104,12 @@ void cluster(const string& config_file, const string& data_file, const string& o
  FILE *timing_file = fopen(timing_name.c_str(), "w");
  Logger *time_logger = new Logger(LOG_DEBG, timing_file);
  Logger *logger = new Logger(LOG_DEBG);
  int device_count = 0;

  //===========
  // Initialise MAGMA
  //===========
#ifdef USE_MAGMA
  int device_count;
  cudaGetDeviceCount(&device_count);
  logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG);
  logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n");
@@ -188,7 +188,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
      // in any case, replace all sprintf() with snprintf(), to avoid in any case writing more than the available buffer size
      char virtual_line[256];
      // Create and initialise pristine cid for MPI proc 0 and thread 0
      ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata);
      ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata, device_count);
      const int ndi = cid->c4->nsph * cid->c4->nlim;
      np_int ndit = 2 * ndi;
      logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n");
@@ -542,7 +542,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
    // copy gconf, sconf, cid and p_scattering_angles from MPI process 0
    GeometryConfiguration *gconf = new GeometryConfiguration(mpidata);
    ScattererConfiguration *sconf = new ScattererConfiguration(mpidata);
    ClusterIterationData *cid = new ClusterIterationData(mpidata);
    ClusterIterationData *cid = new ClusterIterationData(mpidata, device_count);
    ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata);

    // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled
@@ -768,7 +768,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf
#ifdef USE_NVTX
  nvtxRangePush("Invert the matrix");
#endif
  invert_matrix(cid->am, ndit, jer, mxndm);
  invert_matrix(cid->am, ndit, jer, mxndm, cid->proc_device);
#ifdef USE_NVTX
  nvtxRangePop();
#endif
+4 −2
Original line number Diff line number Diff line
@@ -614,13 +614,15 @@ public:
  int xiblock;
  int firstxi;
  int lastxi;
  //! \brief ID of the GPU used by one MPI process.
  int proc_device;

  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata);
  ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count);
  
  ClusterIterationData(const ClusterIterationData& rhs);

#ifdef MPI_VERSION
  ClusterIterationData(const mixMPI *mpidata);
  ClusterIterationData(const mixMPI *mpidata, const int device_count);

  /*! \brief Broadcast over MPI the ClusterIterationData instance from MPI process 0 to all others.
   *
+16 −2
Original line number Diff line number Diff line
@@ -955,7 +955,7 @@ mixMPI::mixMPI(const mixMPI& rhs) {
mixMPI::~mixMPI() {
}

ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata) {
ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) {
  c1 = new C1(gconf, sconf);
  c2 = new C2(gconf, sconf);
  c3 = new C3();
@@ -1054,6 +1054,12 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter
  lastxi = ((mpidata->rank+1) * xiblock)+1;
  firstxi = lastxi-xiblock+1;
  if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales;

#ifdef USE_MAGMA
  proc_device = mpidata->rank % device_count;
#else
  proc_device = 0;
#endif
}

ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
@@ -1202,10 +1208,12 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) {
  lastxi = rhs.lastxi;
  xiblock = rhs.xiblock;
  number_of_scales = rhs.number_of_scales;

  proc_device = rhs.proc_device;
}

#ifdef MPI_VERSION
ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) {
ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int device_count) {
  c1 = new C1(mpidata);
  c2 = new C2(mpidata);
  c3 = new C3(mpidata);
@@ -1336,6 +1344,12 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) {
  lastxi = ((mpidata->rank+1) * xiblock)+1;
  firstxi = lastxi-xiblock+1;
  if (lastxi > number_of_scales) lastxi = number_of_scales;

#ifdef USE_MAGMA
  proc_device = mpidata->rank % device_count;
#else
  proc_device = 0;
#endif
}

void ClusterIterationData::mpibcast(const mixMPI *mpidata) {