Loading src/cluster/cluster.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -104,12 +104,12 @@ void cluster(const string& config_file, const string& data_file, const string& o FILE *timing_file = fopen(timing_name.c_str(), "w"); Logger *time_logger = new Logger(LOG_DEBG, timing_file); Logger *logger = new Logger(LOG_DEBG); int device_count = 0; //=========== // Initialise MAGMA //=========== #ifdef USE_MAGMA int device_count; cudaGetDeviceCount(&device_count); logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n"); Loading Loading @@ -188,7 +188,7 @@ void cluster(const string& config_file, const string& data_file, const string& o // in any case, replace all sprintf() with snprintf(), to avoid in any case writing more than the available buffer size char virtual_line[256]; // Create and initialise pristine cid for MPI proc 0 and thread 0 ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata); ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata, device_count); const int ndi = cid->c4->nsph * cid->c4->nlim; np_int ndit = 2 * ndi; logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n"); Loading Loading @@ -542,7 +542,7 @@ void cluster(const string& config_file, const string& data_file, const string& o // copy gconf, sconf, cid and p_scattering_angles from MPI process 0 GeometryConfiguration *gconf = new GeometryConfiguration(mpidata); ScattererConfiguration *sconf = new ScattererConfiguration(mpidata); ClusterIterationData *cid = new ClusterIterationData(mpidata); ClusterIterationData *cid = new ClusterIterationData(mpidata, device_count); ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata); // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled Loading Loading @@ -768,7 +768,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf #ifdef USE_NVTX nvtxRangePush("Invert the matrix"); #endif invert_matrix(cid->am, ndit, jer, mxndm); invert_matrix(cid->am, ndit, jer, mxndm, cid->proc_device); #ifdef USE_NVTX nvtxRangePop(); #endif Loading src/include/Commons.h +4 −2 Original line number Diff line number Diff line Loading @@ -614,13 +614,15 @@ public: int xiblock; int firstxi; int lastxi; //! \brief ID of the GPU used by one MPI process. int proc_device; ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata); ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count); ClusterIterationData(const ClusterIterationData& rhs); #ifdef MPI_VERSION ClusterIterationData(const mixMPI *mpidata); ClusterIterationData(const mixMPI *mpidata, const int device_count); /*! \brief Broadcast over MPI the ClusterIterationData instance from MPI process 0 to all others. * Loading src/libnptm/Commons.cpp +16 −2 Original line number Diff line number Diff line Loading @@ -955,7 +955,7 @@ mixMPI::mixMPI(const mixMPI& rhs) { mixMPI::~mixMPI() { } ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata) { ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) { c1 = new C1(gconf, sconf); c2 = new C2(gconf, sconf); c3 = new C3(); Loading Loading @@ -1054,6 +1054,12 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter lastxi = ((mpidata->rank+1) * xiblock)+1; firstxi = lastxi-xiblock+1; if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales; #ifdef USE_MAGMA proc_device = mpidata->rank % device_count; #else proc_device = 0; #endif } ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { Loading Loading @@ -1202,10 +1208,12 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { lastxi = rhs.lastxi; xiblock = rhs.xiblock; number_of_scales = rhs.number_of_scales; proc_device = rhs.proc_device; } #ifdef MPI_VERSION ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) { ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int device_count) { c1 = new C1(mpidata); c2 = new C2(mpidata); c3 = new C3(mpidata); Loading Loading @@ -1336,6 +1344,12 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) { lastxi = ((mpidata->rank+1) * xiblock)+1; firstxi = lastxi-xiblock+1; if (lastxi > number_of_scales) lastxi = number_of_scales; #ifdef USE_MAGMA proc_device = mpidata->rank % device_count; #else proc_device = 0; #endif } void ClusterIterationData::mpibcast(const mixMPI *mpidata) { Loading Loading
src/cluster/cluster.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -104,12 +104,12 @@ void cluster(const string& config_file, const string& data_file, const string& o FILE *timing_file = fopen(timing_name.c_str(), "w"); Logger *time_logger = new Logger(LOG_DEBG, timing_file); Logger *logger = new Logger(LOG_DEBG); int device_count = 0; //=========== // Initialise MAGMA //=========== #ifdef USE_MAGMA int device_count; cudaGetDeviceCount(&device_count); logger->log("DEBUG: Proc-" + to_string(mpidata->rank) + " found " + to_string(device_count) + " CUDA devices.\n", LOG_DEBG); logger->log("INFO: Process " + to_string(mpidata->rank) + " initializes MAGMA.\n"); Loading Loading @@ -188,7 +188,7 @@ void cluster(const string& config_file, const string& data_file, const string& o // in any case, replace all sprintf() with snprintf(), to avoid in any case writing more than the available buffer size char virtual_line[256]; // Create and initialise pristine cid for MPI proc 0 and thread 0 ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata); ClusterIterationData *cid = new ClusterIterationData(gconf, sconf, mpidata, device_count); const int ndi = cid->c4->nsph * cid->c4->nlim; np_int ndit = 2 * ndi; logger->log("INFO: Size of matrices to invert: " + to_string((int64_t)ndit) + " x " + to_string((int64_t)ndit) +".\n"); Loading Loading @@ -542,7 +542,7 @@ void cluster(const string& config_file, const string& data_file, const string& o // copy gconf, sconf, cid and p_scattering_angles from MPI process 0 GeometryConfiguration *gconf = new GeometryConfiguration(mpidata); ScattererConfiguration *sconf = new ScattererConfiguration(mpidata); ClusterIterationData *cid = new ClusterIterationData(mpidata); ClusterIterationData *cid = new ClusterIterationData(mpidata, device_count); ScatteringAngles *p_scattering_angles = new ScatteringAngles(mpidata); // Create this variable and initialise it with a default here, so that it is defined anyway, with or without OpenMP support enabled Loading Loading @@ -768,7 +768,7 @@ int cluster_jxi488_cycle(int jxi488, ScattererConfiguration *sconf, GeometryConf #ifdef USE_NVTX nvtxRangePush("Invert the matrix"); #endif invert_matrix(cid->am, ndit, jer, mxndm); invert_matrix(cid->am, ndit, jer, mxndm, cid->proc_device); #ifdef USE_NVTX nvtxRangePop(); #endif Loading
src/include/Commons.h +4 −2 Original line number Diff line number Diff line Loading @@ -614,13 +614,15 @@ public: int xiblock; int firstxi; int lastxi; //! \brief ID of the GPU used by one MPI process. int proc_device; ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata); ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count); ClusterIterationData(const ClusterIterationData& rhs); #ifdef MPI_VERSION ClusterIterationData(const mixMPI *mpidata); ClusterIterationData(const mixMPI *mpidata, const int device_count); /*! \brief Broadcast over MPI the ClusterIterationData instance from MPI process 0 to all others. * Loading
src/libnptm/Commons.cpp +16 −2 Original line number Diff line number Diff line Loading @@ -955,7 +955,7 @@ mixMPI::mixMPI(const mixMPI& rhs) { mixMPI::~mixMPI() { } ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata) { ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, ScattererConfiguration *sconf, const mixMPI *mpidata, const int device_count) { c1 = new C1(gconf, sconf); c2 = new C2(gconf, sconf); c3 = new C3(); Loading Loading @@ -1054,6 +1054,12 @@ ClusterIterationData::ClusterIterationData(GeometryConfiguration *gconf, Scatter lastxi = ((mpidata->rank+1) * xiblock)+1; firstxi = lastxi-xiblock+1; if (lastxi > sconf->number_of_scales) lastxi = sconf->number_of_scales; #ifdef USE_MAGMA proc_device = mpidata->rank % device_count; #else proc_device = 0; #endif } ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { Loading Loading @@ -1202,10 +1208,12 @@ ClusterIterationData::ClusterIterationData(const ClusterIterationData& rhs) { lastxi = rhs.lastxi; xiblock = rhs.xiblock; number_of_scales = rhs.number_of_scales; proc_device = rhs.proc_device; } #ifdef MPI_VERSION ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) { ClusterIterationData::ClusterIterationData(const mixMPI *mpidata, const int device_count) { c1 = new C1(mpidata); c2 = new C2(mpidata); c3 = new C3(mpidata); Loading Loading @@ -1336,6 +1344,12 @@ ClusterIterationData::ClusterIterationData(const mixMPI *mpidata) { lastxi = ((mpidata->rank+1) * xiblock)+1; firstxi = lastxi-xiblock+1; if (lastxi > number_of_scales) lastxi = number_of_scales; #ifdef USE_MAGMA proc_device = mpidata->rank % device_count; #else proc_device = 0; #endif } void ClusterIterationData::mpibcast(const mixMPI *mpidata) { Loading