Commit 97917f8c authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Prepare a version that compiles and uses basic offload

parent 5629fbb4
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ NVTXFLAGS=""
OMPMODE="auto"
OFFLOAD="auto"
OFFLOADFLAGS=""
OFFLOADLDFLAGS=""
# End of default configuration settings

# Function declarations
@@ -875,7 +876,7 @@ int main(int argc, char** argv) {
  return 0;
}
EOF
    $CXX -fopenmp -fcf-protection=none -fno-stack-protector -foffload=nvptx-none="-O3 -ggdb -fcf-protection=none -fno-stack-protector -fopt-info -lm -latomic -lgomp" conf_test_offload.cpp -o conf_test_offload > /dev/null 2>>error.log
    $CXX -fopenmp -fno-strict-aliasing -foffload=nvptx-none="-O2 -march=sm_70 -mptx=7.3" conf_test_offload.cpp -o conf_test_offload > /dev/null 2>>error.log
    result=$?
    rm conf_test_offload.cpp
    if [ "x$result" = "x0" ]; then
@@ -886,7 +887,8 @@ EOF
    if [ "x$result" = "x0" ]; then
	echo "yes"
	echo "yes" >>configure.log
	OFFLOADFLAGS=" -DUSE_TARGET_OFFLOAD -fno-lto -fcf-protection=none -fno-stack-protector -foffload=nvptx-none=\"-O${CXX_OPT}${CXX_DBG} -fno-lto -fcf-protection=none -fno-stack-protector -fopt-info -lm -latomic -lgomp\""
	OFFLOADFLAGS=" -DUSE_TARGET_OFFLOAD -fno-strict-aliasing -foffload=nvptx-none=\"-O2 -march=sm_70 -mptx=7.3\""
	OFFLOADLDFLAGS=" -lgomp"
	if [ "x${OMPFLAGS}" = "x" ]; then
	    OFFLOADFLAGS="-fopenmp ${OFFLOADFLAGS}"
	fi
@@ -894,9 +896,11 @@ EOF
	echo "no"
	echo "no" >>configure.log
	OFFLOADFLAGS=""
	OFFLOADLDFLAGS=""
    fi
else
    OFFLOADFLAGS=""
    OFFLOADLDFLAGS=""
fi
# End of offload checks
if [ "x$CXXFLAGS" = "x" ]; then
@@ -904,9 +908,9 @@ if [ "x$CXXFLAGS" = "x" ]; then
fi
if [ "x$CXXLDFLAGS" = "x" ]; then
    if [ "x$LIBMODE" = "xstatic" ]; then
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}"
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}${OFFLOADLDFLAGS}"
    else
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}"
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}${OFFLOADLDFLAGS}"
    fi
fi

+27 −12
Original line number Diff line number Diff line
@@ -44,6 +44,10 @@
#include <cuda_runtime.h>
#endif

#ifdef USE_MAGMA
#include "magma_v2.h"
#endif

#ifndef INCLUDE_TYPES_H_
#include "../include/types.h"
#endif
@@ -100,6 +104,14 @@
#include "../include/IterationData.h"
#endif

#ifndef INCLUDE_COMMONS_H_
#include "../include/Commons.h"
#endif

#ifndef INCLUDE_MAGMA_CALLS_H_
#include "../include/magma_calls.h"
#endif

using namespace std;

/*! \brief Main calculation loop.
@@ -150,6 +162,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
  const magma_int_t d_array_max_size = 32; // TEMPORARY: can become configurable parameter
  magma_device_t *device_array = new magma_device_t[d_array_max_size];
  magma_int_t num_devices;
  cudaDeviceSetLimit(cudaLimitStackSize, 4096);
  magma_getdevices(device_array, d_array_max_size, &num_devices);
  device_count = (int)num_devices;
  delete[] device_array;
@@ -872,11 +885,13 @@ int cluster_jxi488_cycle(
  outam0->write_to_disk(outam0_name);
  delete outam0;
#endif // DEBUG_AM
  if (rs.use_offload) {
    cms_gpu(cid->am, cid->c1);
  } else {
    cms(cid->am, cid->c1);
  }
#ifdef USE_TARGET_OFFLOAD
#ifdef USE_MAGMA
  magmaDoubleComplex* vec_am = (magmaDoubleComplex *)(cid->am[0]);
  magma_cms(vec_am, cid->c1, cid->proc_device);
#endif //USE_MAGMA
#endif // USE_TARGET_OFFLOAD
  cms_gpu(cid->am[0], cid->c1);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam1 = new VirtualAsciiFile();
  string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt";
@@ -889,10 +904,10 @@ int cluster_jxi488_cycle(
  write_dcomplex_matrix(outam1, cid->am, ndit, ndit, " %5d %5d (%17.8lE,%17.8lE)\n", 1);
  outam1->write_to_disk(outam1_name);
  delete outam1;
#endif
#endif // DEBUG_AM
#ifdef USE_NVTX
  nvtxRangePop();
#endif
#endif // USE_NVTX
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: matrix calculation for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
@@ -900,7 +915,7 @@ int cluster_jxi488_cycle(
  interval_start = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
  nvtxRangePush("Invert the matrix");
#endif
#endif // USE_NVTX
  invert_matrix(cid->am, ndit, jer, output_path, jxi488, mxndm, cid->proc_device, rs);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam2 = new VirtualAsciiFile();
@@ -914,10 +929,10 @@ int cluster_jxi488_cycle(
  write_dcomplex_matrix(outam2, cid->am, ndit, ndit);
  outam2->write_to_disk(outam2_name);
  delete outam2;
#endif
#endif // DEBUG_AM
#ifdef USE_NVTX
  nvtxRangePop();
#endif
#endif // USE_NVTX
  interval_end = chrono::high_resolution_clock::now();
  elapsed = interval_end - interval_start;
  message = "INFO: matrix inversion for scale " + to_string(jxi488) + " took " + to_string(elapsed.count()) + "s.\n";
@@ -931,7 +946,7 @@ int cluster_jxi488_cycle(
  interval_start = chrono::high_resolution_clock::now();
#ifdef USE_NVTX
  nvtxRangePush("Average calculation");
#endif
#endif // USE_NVTX
  ztm(cid->am, cid->c1);
#ifdef DEBUG_AM
  VirtualAsciiFile *outam3 = new VirtualAsciiFile();
@@ -945,7 +960,7 @@ int cluster_jxi488_cycle(
  write_dcomplex_matrix(outam3, cid->am, ndit, ndit);
  outam3->write_to_disk(outam3_name);
  delete outam3;
#endif
#endif // DEBUG_AM
  if (idfc >= 0) {
    if (jxi488 == jwtm) {
      int nlemt = 2 * cid->c1->nlem;
+1 −1
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1);
 * \param am: `complex double **`
 * \param c1: `ParticleDescriptor *`
 */
void cms_gpu(dcomplex **am, ParticleDescriptor *c1);
void cms_gpu(dcomplex *am, ParticleDescriptor *c1);
#endif // USE_TARGET_OFFLOAD

/**
+2 −2
Original line number Diff line number Diff line
@@ -21,11 +21,11 @@
 *
 */

#include <string>

#ifndef INCLUDE_MAGMA_CALLS_H_
#define INCLUDE_MAGMA_CALLS_H_

magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id);

/**
 * \brief Invert a complex matrix with double precision elements.
 *
+4 −0
Original line number Diff line number Diff line
@@ -41,6 +41,10 @@ using namespace std;
#endif

#ifdef USE_MAGMA
#ifndef INCLUDE_COMMONS_H_
#include "../include/Commons.h"
#endif

#ifndef INCLUDE_MAGMA_CALLS_H_
#include "../include/magma_calls.h"
#endif
Loading