Commit 4dd3b91d authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Merge branch 'test_SVD' into 'master'

Offload matrix calculation for CLUSTER

See merge request giacomo.mulas/np_tmcode!110
parents 52b6a488 5ba7bf6e
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ NVTXFLAGS=""
OMPMODE="auto"
OFFLOAD="auto"
OFFLOADFLAGS=""
OFFLOADLDFLAGS=""
# End of default configuration settings

# Function declarations
@@ -875,7 +876,7 @@ int main(int argc, char** argv) {
  return 0;
}
EOF
    $CXX -fopenmp -fcf-protection=none -fno-stack-protector -foffload=nvptx-none="-O3 -ggdb -fcf-protection=none -fno-stack-protector -fopt-info -lm -latomic -lgomp" conf_test_offload.cpp -o conf_test_offload > /dev/null 2>>error.log
    $CXX -fopenmp -fno-strict-aliasing -foffload=nvptx-none="-O2 -march=sm_70 -mptx=7.3" conf_test_offload.cpp -o conf_test_offload > /dev/null 2>>error.log
    result=$?
    rm conf_test_offload.cpp
    if [ "x$result" = "x0" ]; then
@@ -886,7 +887,8 @@ EOF
    if [ "x$result" = "x0" ]; then
	echo "yes"
	echo "yes" >>configure.log
	OFFLOADFLAGS=" -DUSE_TARGET_OFFLOAD -fcf-protection=none -fno-stack-protector -foffload=nvptx-none=\"-O${CXX_OPT}${CXX_DBG} -fcf-protection=none -fno-stack-protector -fopt-info -lm -latomic -lgomp\""
	OFFLOADFLAGS=" -DUSE_TARGET_OFFLOAD -fno-strict-aliasing -foffload=nvptx-none=\"-O2 -march=sm_70 -mptx=7.3 -lm\""
	OFFLOADLDFLAGS=" -lgomp"
	if [ "x${OMPFLAGS}" = "x" ]; then
	    OFFLOADFLAGS="-fopenmp ${OFFLOADFLAGS}"
	fi
@@ -894,9 +896,11 @@ EOF
	echo "no"
	echo "no" >>configure.log
	OFFLOADFLAGS=""
	OFFLOADLDFLAGS=""
    fi
else
    OFFLOADFLAGS=""
    OFFLOADLDFLAGS=""
fi
# End of offload checks
if [ "x$CXXFLAGS" = "x" ]; then
@@ -904,9 +908,9 @@ if [ "x$CXXFLAGS" = "x" ]; then
fi
if [ "x$CXXLDFLAGS" = "x" ]; then
    if [ "x$LIBMODE" = "xstatic" ]; then
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}"
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}${OFFLOADLDFLAGS}"
    else
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}"
	CXXLDFLAGS="-Llibnptm -lnptm ${HDF5LDFLAGS} ${LDFLAGS} ${LAPACKLDFLAGS}${CUBLASLDFLAGS}${MAGMALDFLAGS}${OFFLOADLDFLAGS}"
    fi
fi

+177 −276

File changed.

Preview size limit exceeded, changes collapsed.

+8 −0
Original line number Diff line number Diff line
@@ -132,6 +132,8 @@ protected:
  double _accuracy_goal;
  //! \brief Maximum number of refinement iterations.
  int _ref_iters;
  //! \brief Flag to use target offload, if available.
  bool _offload_flag;

public:
  //! \brief Read-only view on number of spherical components.
@@ -196,6 +198,8 @@ public:
  const double& accuracy_goal = _accuracy_goal;
  //! \brief Read-only view on maximum number of refinement iterations.
  const int& ref_iters = _ref_iters;
  //! \brief Read-only view on flag to use target offload, if available.
  const bool& offload_flag = _offload_flag;
  
  /**
   * \brief Build a scattering geometry configuration structure.
@@ -345,6 +349,8 @@ protected:
  int _max_ref_iters;
  //! \brief Refinement flag.
  bool _use_refinement;
  //! \brief Offload flag.
  bool _use_offload;

public:
  //! \brief Flag for LU factorization inversion.
@@ -368,6 +374,8 @@ public:
  const int& max_ref_iters = _max_ref_iters;
  //! \brief Read-only view on refinement flag.
  const bool& use_refinement = _use_refinement;
  //! \brief Read-only view on offload flag.
  const bool& use_offload = _use_offload;
  //! \brief Pointer to a Logger instance.
  const Logger *logger;

+80 −74
Original line number Diff line number Diff line
@@ -14,7 +14,8 @@
   this program in the COPYING file. If not, see: <https://www.gnu.org/licenses/>.
 */

/*! \file clu_subs.h
/**
 * \file clu_subs.h
 *
 * \brief C++ porting of CLU functions and subroutines.
 *
@@ -31,7 +32,8 @@
#ifndef INCLUDE_CLU_SUBS_H_
#define INCLUDE_CLU_SUBS_H_

/*! \brief Compute the asymmetry-corrected scattering cross-section of a cluster.
/**
 * \brief Compute the asymmetry-corrected scattering cross-section of a cluster.
 *
 * This function computes the product between the geometrical asymmetry parameter and
 * the scattering cross-section, like `aps()`, but for a cluster of spheres. See Eq. (3.16)
@@ -50,7 +52,8 @@ void apc(
	 double sqk, double **gapr, dcomplex **gapp
);

/*! \brief Compute the asymmetry-corrected scattering cross-section under random average
/**
 * \brief Compute the asymmetry-corrected scattering cross-section under random average
 * conditions.
 *
 * This function computes the product between the geometrical asymmetry parameter and
@@ -69,7 +72,8 @@ void apcra(
	   double **gaprm, dcomplex **gappm
);

/*! \brief Complex inner product.
/**
 * \brief Complex inner product.
 *
 * This function performs the complex inner product. It is used by `lucin()`.
 *
@@ -83,7 +87,8 @@ void apcra(
 */
dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int istep);

/*! \brief C++ porting of CGEV. ANNOTATION: Get weight of T-matrix element.
/**
 * \brief C++ porting of CGEV. ANNOTATION: Get weight of T-matrix element.
 *
 * \param ipamo: `int`
 * \param mu: `int`
@@ -93,7 +98,20 @@ dcomplex cdtp(dcomplex z, dcomplex *vec_am, int i, int jf, int k, int nj, np_int
 */
double cgev(int ipamo, int mu, int l, int m);

/*! \brief Build the multi-centered M-matrix of the cluster.
/**
 * \brief Build the multi-centered M-matrix of the cluster.
 *
 * This function constructs the multi-centered M-matrix of the cluster, according
 * to Eq. (5.28) of Borghese, Denti & Saija (2007).
 *
 * \param[in,out] am: `complex double *`
 * \param[in] c1: `ParticleDescriptor *`
 */
void cms(dcomplex *am, ParticleDescriptor *c1);

#ifdef USE_TARGET_OFFLOAD
/**
 * \brief Build the multi-centered M-matrix of the cluster.
 *
 * This function constructs the multi-centered M-matrix of the cluster, according
 * to Eq. (5.28) of Borghese, Denti & Saija (2007).
@@ -101,9 +119,11 @@ double cgev(int ipamo, int mu, int l, int m);
 * \param am: `complex double **`
 * \param c1: `ParticleDescriptor *`
 */
void cms(dcomplex **am, ParticleDescriptor *c1);
void cms_flat(dcomplex *am, ParticleDescriptor *c1);
#endif // USE_TARGET_OFFLOAD

/*! \brief Compute orientation-averaged scattered field intensity.
/**
 * \brief Compute orientation-averaged scattered field intensity.
 *
 * This function computes the intensity of the scattered field for the cluster,
 * averaged on the orientations. It is invoked for IAVM=1 (ANNOTATION: geometry
@@ -115,46 +135,30 @@ void cms(dcomplex **am, ParticleDescriptor *c1);
 */
void crsm1(double vk, double exri, ParticleDescriptor *c1);

/*! \brief Compute the transfer vector from N2 to N1.
 *
 * This function computes the transfer vector going from N2 to N1, using either
 * Hankel, Bessel or Bessel from origin functions.
 *
 * \param ihi: `int`
 * \param ipamo: `int`
 * \param nbl: `int`
 * \param l1: `int`
 * \param m1: `int`
 * \param l2: `int`
 * \param m2: `int`
 * \param c1: `ParticleDescriptor *`
 * \param rac3j: `dcomplex *`
 */
dcomplex ghit_d(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2,
	      ParticleDescriptor *c1, double *rac3j
);

/*! \brief Compute the transfer vector from N2 to N1.
/**
 * \brief Compute the transfer vector from N2 to N1.
 *
 * This function computes the transfer vector going from N2 to N1, using either
 * Hankel, Bessel or Bessel from origin functions.
 *
 * \param ihi: `int`
 * \param ipamo: `int`
 * \param nbl: `int`
 * \param l1: `int`
 * \param m1: `int`
 * \param l2: `int`
 * \param m2: `int`
 * \param c1: `ParticleDescriptor *` Poiunter to a ParticleDescriptor instance.
 * \param[in] ihi: `int` Function mode.
 * \param[in] ipamo: `int`
 * \param[in] nbl: `int` Block identifier.
 * \param[in] l1: `int` First L quantum number.
 * \param[in] m1: `int` First M quantum number.
 * \param[in] l2: `int` Second L quantum number.
 * \param[in] m2: `int` Second M quantum number.
 * \param[in] c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance.
 * \param[in,out] rac3j: `double[]` J connection vector.
 * \return result: `dcomplex` Matrix element.
 */
dcomplex ghit(
	      int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2,
	      ParticleDescriptor *c1
  int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, ParticleDescriptor *c1,
  double rac3j[]
);

/*! \brief Compute Hankel funtion and Bessel functions.
/**
 * \brief Compute Hankel funtion and Bessel functions.
 *
 * This function constructs the Hankel function and the Bessel functions vectors. See
 * page 331 in Borghese, Denti & Saija (2007).
@@ -171,7 +175,8 @@ void hjv(
	 ParticleDescriptor *c1
);

/*! \brief Invert the multi-centered M-matrix.
/**
 * \brief Invert the multi-centered M-matrix.
 *
 * This function performs the inversion of the multi-centered M-matrix through
 * LU decomposition. See Eq. (5.29) in Borghese, Denti & Saija (2007).
@@ -183,7 +188,8 @@ void hjv(
 */
void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier);

/*! \brief Compute the average extinction cross-section.
/**
 * \brief Compute the average extinction cross-section.
 *
 * This funbction computes the average extinction cross-section starting
 * from the definition of the scattering amplitude. See Sec. 3.2.1 of Borghese,
@@ -197,7 +203,8 @@ void lucin(dcomplex **am, const np_int nddmst, np_int n, int &ier);
 */
void mextc(double vk, double exri, dcomplex **fsac, double **cextlr, double **cext);

/*! \brief Compute cross-sections and forward scattering amplitude for the cluster.
/**
 * \brief Compute cross-sections and forward scattering amplitude for the cluster.
 *
 * This function computes the scattering, absorption and extinction cross-sections
 * of the cluster, together with the Forward Scattering Amplitude.
@@ -209,7 +216,8 @@ void mextc(double vk, double exri, dcomplex **fsac, double **cextlr, double **ce
 */
void pcros(double vk, double exri, ParticleDescriptor *c1);

/*! \brief Compute orientation-averaged cross-sections and forward scattering amplitude.
/**
 * \brief Compute orientation-averaged cross-sections and forward scattering amplitude.
 *
 * This function computes the orientation-averaged scattering, absorption and extinction
 * cross-sections of the cluster, together with the averaged Forward Scattering Amplitude.
@@ -221,7 +229,8 @@ void pcros(double vk, double exri, ParticleDescriptor *c1);
 */
void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1);

/*! \brief Transform Cartesian quantities to spherical coordinates ones.
/**
 * \brief Transform Cartesian quantities to spherical coordinates ones.
 *
 * This function performs a conversion from the Cartesian coordinates system to the
 * spherical one. It is used by `sphar()`.
@@ -240,7 +249,8 @@ void polar(
	   double &cph, double &sph
);

/*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients towards J=0.
/**
 * \brief Compute the 3j symbol for Clebsch-Gordan coefficients towards J=0.
 *
 * This function calculates the 3j(J,J2,J3;0,0,0) symbol for the Clebsch-Gordan
 * coefficients. See Appendix a.3.1 in Borghese, Denti & Saija (2007).
@@ -251,33 +261,22 @@ void polar(
 */
void r3j000(int j2, int j3, double *rac3j);

/*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions.
/**
 * \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions.
 *
 * This function calculates the 3j(J,J2,J3;-M2-M3,M2,M3) symbol for the Clebsch-Gordan
 * coefficients. See Appendix a.3.1 in Borghese, Denti & Saija (2007).
 *
 * \param j2: `int`
 * \param j3: `int`
 * \param m2: `int`
 * \param m3: `int`
 * \param rac3j: `double *` Vector of 3j symbols.
 */
void r3jjr(int j2, int j3, int m2, int m3, double *rac3j);

/*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions.
 *
 * This function calculates the 3j(J,J2,J3;-M2-M3,M2,M3) symbol for the Clebsch-Gordan
 * coefficients. See Appendix a.3.1 in Borghese, Denti & Saija (2007).
 *
 * \param j2: `int`
 * \param j3: `int`
 * \param m2: `int`
 * \param m3: `int`
 * \param rac3j: `double *`
 * \param[in] j2: `int` Value of J2.
 * \param[in] j3: `int` Value of J3.
 * \param[in] m2: `int` Value of M2.
 * \param[in] m3: `int` Value of M3.
 * \param[out] rac3j: `double[]` Vector of 3j symbols.
 */
void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j);
void r3jjr(int j2, int j3, int m2, int m3, double rac3j[]);

/*! \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JM transitions.
/**
 * \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JM transitions.
 *
 * This function calculates the 3j(J,J2,J3;M1,M,-M1-M) symbol for the Clebsch-Gordan
 * coefficients. See Appendix a.3.1 in Borghese, Denti & Saija (2007).
@@ -290,7 +289,8 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j);
 */
void r3jmr(int j1, int j2, int j3, int m1, double *rac3j);

/*! \brief Compute radiation torques on a particle in Cartesian coordinates.
/**
 * \brief Compute radiation torques on a particle in Cartesian coordinates.
 *
 * This function computes radiation torque on on a cluster of spheres as the
 * result of the difference between the extinction and the scattering
@@ -310,7 +310,8 @@ void raba(
	  dcomplex **tqcpe, double **tqcs, dcomplex **tqcps
);

/*! \brief Compute the radiation force Cartesian components.
/**
 * \brief Compute the radiation force Cartesian components.
 *
 * This function computes the Cartesian components of the radiation force
 * exerted on a particle. See Sec. 3.2.1 in Borghese, Denti & Saija (2007).
@@ -336,7 +337,8 @@ void rftr(
	  double &fy, double &fz
);

/*! \brief Compute Mie cross-sections for the sphere units in the cluster.
/**
 * \brief Compute Mie cross-sections for the sphere units in the cluster.
 *
 * This function computes the scattering, absorption and extinction cross-sections
 * for the spheres composing the cluster, in terms of Mie coefficients, together
@@ -349,7 +351,8 @@ void rftr(
 */
void scr0(double vk, double exri, ParticleDescriptor *c1);

/*! \brief Compute the scattering amplitude for a single sphere in an aggregate.
/**
 * \brief Compute the scattering amplitude for a single sphere in an aggregate.
 *
 * This function computes the scattering amplitude for single spheres constituting
 * an aggregate. See Sec. 4.2.1 in Borghese, Denti & Saija (2007).
@@ -364,7 +367,8 @@ void scr2(
	  double vk, double vkarg, double exri, double *duk, ParticleDescriptor *c1
);

/*! \brief Transform sphere Cartesian coordinates to spherical coordinates.
/**
 * \brief Transform sphere Cartesian coordinates to spherical coordinates.
 *
 * This function transforms the Cartesian coordinates of the spheres in an aggregate
 * to radial coordinates, then it calls `sphar()` to calculate the vector of spherical
@@ -375,7 +379,8 @@ void scr2(
 */
void str(double **rcf, ParticleDescriptor *c1);

/*! \brief Compute radiation torques on particles on a k-vector oriented system.
/**
 * \brief Compute radiation torques on particles on a k-vector oriented system.
 *
 * This function computes the radiation torques resulting from the difference
 * between absorption and scattering contributions, like `rabas()`, but for a
@@ -399,7 +404,8 @@ void tqr(
	 double &ten, double &tek, double &tsp, double &tsn, double &tsk
);

/*! \brief Calculate the single-centered inversion of the M-matrix.
/**
 * \brief Calculate the single-centered inversion of the M-matrix.
 *
 * This function computes the single-centered inverted M-matrix appearing in Eq. (5.28)
 * of Borghese, Denti & Saija (2007).
+156 −47
Original line number Diff line number Diff line
@@ -21,19 +21,163 @@
 *
 */

#include <string>

#ifndef INCLUDE_MAGMA_CALLS_H_
#define INCLUDE_MAGMA_CALLS_H_

#define MAX_GPU_STACK_DOUBLE 128

#ifdef USE_TARGET_OFFLOAD
#pragma omp begin declare target device_type(any)
/**
 * \brief C++ porting of CGEV. Get weight of T-matrix element (MAGMA GPU version).
 *
 * \param[in] ipamo: `int`
 * \param[in] mu: `int`
 * \param[in] l: `int`
 * \param[in] m: `int`
 * \return result: `double`
 */
double magma_cgev(int ipamo, int mu, int l, int m);
#pragma omp end declare target

/**
 * \brief Initialize a MAGMA compliant T-matrix leaving it on the GPU.
 *
 * \param[in,out] vec_am: `magmaDoubleComplex *` Vector form of the matrix.
 * \param[in] c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance.
 * \param[in] rxx: `double *` Vector of sphere X coordinates.
 * \param[in] ryy: `double *` Vector of sphere Y coordinates.
 * \param[in] rzz: `double *` Vector of sphere Z coordinates.
 * \param[in] ind3j: `int *` Vector form of the 3J index look-up table.
 * \param[in] v3j0: `double *`
 * \param[in] vh: `magmaDoubleComplex *`
 * \param[in] vyhj: `magmaDoubleComplex *`
 * \param[in] vj0: `magmaDoubleComplex *`
 * \param[in] vyj0: `magmaDoubleComplex *`
 * \param[in] rmi: `magmaDoubleComplex *` Vector of Mie magnetic coefficients.
 * \param[in] rei: `magmaDoubleComplex *` Vector of Mie electric coefficients.
 * \param[in] device_id: `int` ID of the device to build the matrix on.
 * \return result: `magma_int_t` A return code (MAGMA_SUCCESS, if everything is fine).
 */
magma_int_t magma_cms(
  magmaDoubleComplex *vec_am, ParticleDescriptor *c1, double *rxx, double *ryy, double *rzz,
  int *ind3j, double *v3j0, magmaDoubleComplex *vh, magmaDoubleComplex *vyhj,
  magmaDoubleComplex *vj0, magmaDoubleComplex *vyj0, magmaDoubleComplex *rmi,
  magmaDoubleComplex* rei, int device_id
);

/**
 * \brief Compute the transfer vector from N2 to N1.
 *
 * This function computes the transfer vector going from N2 to N1, using either
 * Hankel, Bessel or Bessel from origin functions.
 *
 * \tparam IHI: `int` Function mode.
 * \param[in] ipamo: `int`
 * \param[in] nbl: `int` Block identifier.
 * \param[in] l1: `int` First L quantum number.
 * \param[in] m1: `int` First M quantum number.
 * \param[in] l2: `int` Second L quantum number.
 * \param[in] m2: `int` Second M quantum number.
 * \param[in] rxx: `double *` X coordinates of the spheres.
 * \param[in] ryy: `double *` Y coordinates of the spheres.
 * \param[in] rzz: `double *` Z coordinates of the spheres.
 * \param[in] ind3j: `int *` J vector index look-up table.
 * \param[in] v3j0: `double *` J0 vectors.
 * \param[in] vh: `magmaDoubleComplex *`
 * \param[in] vyhj: `magmaDoubleComplex *`
 * \param[in] vj0: `magmaDoubleComplex *`
 * \param[in] vyj0: `magmaDoubleComplex *`
 * \param[in] vj: `magmaDoubleComplex`
 * \param[in] li: `int` Maximum internal expansion order.
 * \param[in] le: `int` Maximum external expansion order.
 * \param[in,out] rac3j: `double[]` J connection vector.
 * \return result: `magmaDoubleComplex` Matrix element.
 */
#pragma omp begin declare target device_type(any)
template<int IHI> magmaDoubleComplex magma_ghit(
  int ipamo, int nbl, int l1, int m1, int l2, int m2,
  double *rxx, double *ryy, double *rzz, int *ind3j,
  double *v3j0, magmaDoubleComplex *vh, magmaDoubleComplex *vyhj,
  magmaDoubleComplex *vj0, magmaDoubleComplex *vyj0,
  magmaDoubleComplex vj, int li, int le, double rac3j[]
);
#pragma omp end declare target

#pragma omp begin declare target device_type(any)
/**
 * \brief Compute the 3j symbol for Clebsch-Gordan coefficients for JJ transitions.
 *
 * This function calculates the 3j(J,J2,J3;-M2-M3,M2,M3) symbol for the Clebsch-Gordan
 * coefficients. See Appendix a.3.1 in Borghese, Denti & Saija (2007). This function
 * is a specialized version to work on GPUs with MAGMA.
 *
 * \param[in] j2: `int` Value of J2.
 * \param[in] j3: `int` Value of J3.
 * \param[in] m2: `int` Value of M2.
 * \param[in] m3: `int` Value of M3.
 * \param[out] rac3j: `double[]` Vector of 3j symbols.
 */
void magma_r3jjr(int j2, int j3, int m2, int m3, double rac3j[]);
#pragma omp end declare target

/**
 * \brief Invert a pre-existing GPU complex matrix with double precision elements.
 *
 * \param[in,out] mat: Matrix of complex. The matrix to be inverted.
 * \param[in] n: `magma_int_t` The number of rows and columns of the [n x n] matrix.
 * \param[out] jer: `int &` Reference to an integer return flag.
 * \param[in] queue: `magma_queue_t` The MAGMA device communication queue.
 * \param[in] device_id: `int` ID of the device for matrix inversion offloading (optional, default 0).
 * \param[in] rs: `const RuntimeSettings &` Runtime settings instance (optional).
 */
void magma_zinvert_resident(
  magmaDoubleComplex *mat, magma_int_t n, int &jer, magma_queue_t queue, int device_id=0,
  const RuntimeSettings& rs=RuntimeSettings()
);

/**
 * \brief Compute the monocentered T-matrix
 *
 * \param[in] vec_am: `magmaDoubleComplex *` Vector form of the multi-centered matrix.
 * \param[in] c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance.
 * \param[in] rxx: `double *` Vector of sphere X coordinates.
 * \param[in] ryy: `double *` Vector of sphere Y coordinates.
 * \param[in] rzz: `double *` Vector of sphere Z coordinates.
 * \param[in] ind3j: `int *` Vector form of the 3J index look-up table.
 * \param[in] v3j0: `double *`
 * \param[in] vh: `magmaDoubleComplex *`
 * \param[in] vyhj: `magmaDoubleComplex *`
 * \param[in] vj0: `magmaDoubleComplex *`
 * \param[in] vyj0: `magmaDoubleComplex *`
 * \param[in] sam_v: `magmaDoubleComplex *` Vector form of sums from vec_am.
 * \param[in] gis_v: `magmaDoubleComplex *`
 * \param[in] gls_v: `magmaDoubleComplex *`
 * \param[in] device_id: `int` ID of the device to build the matrix on.
 * \return result: `magma_int_t` A return code (MAGMA_SUCCESS, if everything is fine).
 */
magma_int_t magma_ztm(
  magmaDoubleComplex *vec_am, ParticleDescriptor *c1, double *rxx, double *ryy,
  double *rzz, int *ind3j, double *v3j0, magmaDoubleComplex *vh, magmaDoubleComplex *vyhj,
  magmaDoubleComplex *vj0, magmaDoubleComplex *vyj0, magmaDoubleComplex *sam_v,
  magmaDoubleComplex *gis_v, magmaDoubleComplex *gls_v, magmaDoubleComplex *vec_am0m,
  int device_id
);
#endif // USE_TARGET_OFFLOAD

/**
 * \brief Invert a complex matrix with double precision elements.
 *
 * \param mat: Matrix of complex. The matrix to be inverted.
 * \param n: `np_int` The number of rows and columns of the [n x n] matrix.
 * \param jer: `int &` Reference to an integer return flag.
 * \param device_id: `int` ID of the device for matrix inversion offloading.
 * \param rs: `const RuntimeSettings &` Runtime settings instance.
 * This function is a first implementation of GPU use. The data transfer
 * steps are handled internally, via standard MAGMA calls, so that the matrix
 * to be inverted is copied from the host memory to the GPU, inverted, possibly
 * refined and finally taken back to the host, leaving the GPU free.
 *
 * \param[in,out] mat: Matrix of complex. The matrix to be inverted.
 * \param[in] n: `np_int` The number of rows and columns of the [n x n] matrix.
 * \param[out] jer: `int &` Reference to an integer return flag.
 * \param[in] device_id: `int` ID of the device for matrix inversion offloading (optional, default 0).
 * \param[in] rs: `const RuntimeSettings &` Runtime settings instance (optional).
 */
void magma_zinvert(
  dcomplex **mat, np_int n, int &jer, int device_id=0,
@@ -57,11 +201,11 @@ void magma_zinvert(
 * the calling function will be responsible of getting the refined matrix from
 * device back to the host.
 *
 * \param rs: `const RuntimeSettings &` Runtime settings instance. [IN]
 * \param a: `magmaDoubleComplex *` Pointer to the first element of the non-inverted matrix on host. [IN]
 * \param m: `const magma_int_t` Number of rows / columns in a. [IN]
 * \param d_a: `magmaDoubleComplex *` Pointer to the matrix on the GPU. [IN/OUT]
 * \param queue: `magma_queue_t` GPU communication queue. [IN]
 * \param[in] rs: `const RuntimeSettings &` Runtime settings instance.
 * \param[in] a: `magmaDoubleComplex *` Pointer to the first element of the non-inverted matrix on host.
 * \param[in] m: `const magma_int_t` Number of rows / columns in a.
 * \param[in,out] d_a: `magmaDoubleComplex *` Pointer to the matrix on the GPU.
 * \param[in] queue: `magma_queue_t` GPU communication queue.
 * \return err: `magma_int_t` An error code (MAGMA_SUCCESS, if everything was fine).
 */
magma_int_t magma_newton(
@@ -69,39 +213,4 @@ magma_int_t magma_newton(
  magmaDoubleComplex* d_a, magma_queue_t queue
);

/* \brief Invert a complex matrix with double precision elements, applying iterative refinement of the solution
 *
 * call magma_zinvert1() to perform the first matrix inversion, then magma_refine() to do the refinement (only if maxrefiters is >0)
 *
 * \param mat: Matrix of complex. The matrix to be inverted.
 * \param n: `np_int` The number of rows and columns of the [n x n] matrix.
 * \param jer: `int &` Reference to an integer return flag.
 * \param maxrefiters: `int` Maximum number of refinement iterations to apply.
 * \param accuracygoal: `double &` Accuracy to achieve in iterative refinement, defined as the module of the maximum difference between the identity matrix and the matrix product of the (approximate) inverse times the original matrix. On return, it contains the actually achieved accuracy.
 * \param refinemode: `int` Flag to control the refinement mode.
 * \param device_id: `int` ID of the device for matrix inversion offloading.
 * \param output_path: `const string &` Path where the output needs to be placed.
 * \param jxi488: `int` Index of the current wavelength calculation.
 */
//void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxrefiters, double &accuracygoal, int refinemode, int device_id, const string& output_path, int jxi488);

/* \brief Apply iterative refinement of the solution of a matrix inversion.
 *
 * Iteratively compute and apply a correction to the inverse `inva` of the complex
 * matrix `aorig`, for a maximum number of `maxiters` times, or until achieving a
 * maximum residual better than `accuracygoal`.
 *
 * \param aorig: pointer to the first element of the matrix of complex to be inverted.
 * \param inva: pointer to the first element of inverse.
 * \param n: `np_int` The number of rows and columns of the [n x n] matrices.
 * \param jer: `int &` Reference to an integer return flag.
 * \param maxrefiters: `int` Maximum number of refinement iterations to apply.
 * \param accuracygoal: `double` Accuracy to achieve in iterative refinement, defined as the module of the maximum difference between the identity matrix and the matrix product of the (approximate) inverse times the original matrix. On return, it contains the actually achieved accuracy.
 * \param refinemode: `int` Flag for refinement mode selection.
 * \param device_id: `int` ID of the device for matrix inversion offloading.
 * \param output_path: `const string &` Path where the output needs to be placed.
 * \param jxi488: `int` Index of the current wavelength calculation.
 */
// void magma_refine(dcomplex *aorig, dcomplex *inva, np_int n, int &jer, int &maxrefiters, double &accuracygoal, int refinemode, int device_id, const std::string& output_path, int jxi488);

#endif
Loading