Commit 4d00b49b authored by Giacomo Mulas's avatar Giacomo Mulas
Browse files

Merge branch 'containers-m8' into 'master'

Containers m8

See merge request giacomo.mulas/np_tmcode!45
parents 8bccc887 69c6ccd5
Loading
Loading
Loading
Loading
+20 −2
Original line number Diff line number Diff line
@@ -22,6 +22,14 @@ RUN DEBIAN_FRONTEND=noninteractive apt -y install intel-oneapi-compiler-fortran
RUN DEBIAN_FRONTEND=noninteractive apt -y install liblapacke-dev liblapacke64-dev libopenblas-dev libopenblas-openmp-dev libopenblas64-dev libopenblas64-openmp-dev
# install MPI stack
RUN DEBIAN_FRONTEND=noninteractive apt -y install mpi-default-dev mpi-default-bin
# install nvidia stuff
COPY --chown=root:root containers/docker/dockerstuff/debian-nonfree/debian.sources /etc/apt/sources.list.d/
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt -y install libquadmath0 libcublaslt11 libgcc-s1 libgomp1 libcudart11.0 libcusparse11 libcublas11 nvidia-cuda-dev
# install magma
COPY --chown=root:root containers/docker/dockerstuff/magma-compiled/usr/lib/* /usr/lib/x86_64-linux-gnu/
COPY --chown=root:root containers/docker/dockerstuff/magma-compiled/usr/include/* /usr/include/
COPY --chown=root:root containers/docker/dockerstuff/magma-compiled/usr/lib/pkgconfig/* /usr/lib/x86_64-linux-gnu/pkgconfig/
# install packages needed to run python scripts for checks
RUN DEBIAN_FRONTEND=noninteractive apt -y install python3 python-is-python3 python3-regex
# install packages needed to run doxygen to create html docs
@@ -46,7 +54,13 @@ ADD src /root/np-tmcode/src
ADD doc /root/np-tmcode/doc
ADD build /root/np-tmcode/build
ADD test_data /root/np-tmcode/test_data
RUN cd np-tmcode/src && make wipe && make -j && cd ../doc/src && doxygen config.dox && cd ../build/latex && make -j
RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_magma_mpi
RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_MAGMA=1 USE_OPENMP=1 CXX=g++ FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_magma_serial
RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_lapack_mpi && cd ../build/cluster && ln -s np_cluster_lapack_mpi np_cluster
RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_LAPACK=1 USE_OPENMP=1 CXX=g++ FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_lapack_serial
RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 USE_MPI=1 CXX=mpicxx FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_legacy_mpi
RUN cd np-tmcode/src && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 CXX=g++ FC=gfortran make wipe && BUILDDIR=../../build BUILDDIR_NPTM=../../build/libnptm LIBNPTM=../../build/libnptm/libnptm.a USE_ILP64=1 USE_OPENMP=1 CXX=g++ FC=gfortran make -j && mv ../build/cluster/np_cluster ../build/cluster/np_cluster_legacy_serial
RUN cd np-tmcode/doc/src && doxygen config.dox && cd ../build/latex && make -j


# create the container on which the np-tmcode is installed, restarting from
@@ -55,8 +69,12 @@ FROM debian:bookworm-slim AS np-tmcode-run-minimal
WORKDIR /root
# install the strictly needed runtime libraries needed to run the executables
# and the python check scripts
RUN DEBIAN_FRONTEND=noninteractive apt update && DEBIAN_FRONTEND=noninteractive apt upgrade && DEBIAN_FRONTEND=noninteractive apt -y install libgfortran5 libgcc-s1 libhdf5-103-1 libstdc++6 libssl3 libcurl4 libsz2 zlib1g libnghttp2-14 libidn2-0 librtmp1 libssh2-1 libpsl5 libgssapi-krb5-2 libldap-2.5-0 libzstd1 libbrotli1 libaec0 libunistring2 libgmp10 libkrb5-3 libk5crypto3 libcom-err2 libkrb5support0 libsasl2-2 libp11-kit0 libtasn1-6 libkeyutils1 libffi8 liblapacke64 libopenblas64-0-openmp python3 python-is-python3 python3-regex hdf5-tools mpi-default-bin && rm -rf /var/lib/apt/lists/*
COPY --chown=root:root containers/docker/dockerstuff/debian-nonfree/debian.sources /etc/apt/sources.list.d/
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt update && DEBIAN_FRONTEND=noninteractive apt upgrade && DEBIAN_FRONTEND=noninteractive apt -y install libgfortran5 libgcc-s1 libhdf5-103-1 libstdc++6 libssl3 libcurl4 libsz2 zlib1g libnghttp2-14 libidn2-0 librtmp1 libssh2-1 libpsl5 libgssapi-krb5-2 libldap-2.5-0 libzstd1 libbrotli1 libaec0 libunistring2 libgmp10 libkrb5-3 libk5crypto3 libcom-err2 libkrb5support0 libsasl2-2 libp11-kit0 libtasn1-6 libkeyutils1 libffi8 liblapacke64 libopenblas64-0-openmp python3 python-is-python3 python3-regex hdf5-tools mpi-default-bin libquadmath0 libcublaslt11 libgcc-s1 libgomp1 libcudart11.0 libcusparse11 libcublas11 && rm -rf /var/lib/apt/lists/*
COPY --from=np-tmcode-run-dev /root /root
COPY --from=np-tmcode-run-dev /usr/lib/x86_64-linux-gnu/libmagma.so /usr/lib/x86_64-linux-gnu/libmagma.so
COPY --from=np-tmcode-run-dev /usr/lib/x86_64-linux-gnu/libmagma.so.2 /usr/lib/x86_64-linux-gnu/libmagma.so.2
# remove everything which is not needed to run the codes
RUN cd /root/np-tmcode && find build -name "*.o" -exec rm -v \{\} \; && find build -name "*.gcno" -exec rm -v \{\} \; && cd src && rm -rvf cluster libnptm trapping include sphere Makefile make.inc README.md && cd .. && rm -rvf containers && cd doc && rm -rvf src && cd build/latex && rm -rvf *.tex *.out *.sty *.ind *.log *.toc *.ilg *.idx *.aux *.eps Makefile class*.pdf
# move the installed software to /usr/local
+14 −0
Original line number Diff line number Diff line
Types: deb
# http://snapshot.debian.org/archive/debian/20240612T000000Z
URIs: http://deb.debian.org/debian
Suites: bookworm bookworm-updates
Components: main non-free contrib non-free-firmware
Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg

Types: deb
# http://snapshot.debian.org/archive/debian-security/20240612T000000Z
URIs: http://deb.debian.org/debian-security
Suites: bookworm-security
Components: main non-free contrib non-free-firmware
Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
+39 −0
Original line number Diff line number Diff line
/*
    -- MAGMA (version 2.7.2) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date August 2023
*/

#ifndef MAGMA_H
#define MAGMA_H

#ifdef MAGMA_NO_V1
#error "Since MAGMA_NO_V1 is defined, magma.h is invalid; use magma_v2.h"
#endif

// =============================================================================
// MAGMA configuration
#include "magma_config.h"


// magma v1 includes cublas.h by default, unless cublas_v2.h has already been included
#ifndef CUBLAS_V2_H_
#if defined(MAGMA_HAVE_CUDA)
#include <cublas.h>
#endif
#endif

// Include the MAGMA v2 and v1 APIs,
// then map names to the v1 API (e.g., magma_zgemm => magma_zgemm_v1).
// Some functions (like setmatrix_async) are the same in v1 and v2,
// so are provided by the v2 API.
#include "magma_v2.h"
#include "magmablas_v1.h"
#include "magmablas_v1_map.h"

#undef  MAGMA_API
#define MAGMA_API 1

#endif // MAGMA_H
+106 KiB

File added.

No diff preview for this file type.

+407 −0
Original line number Diff line number Diff line
/*
    -- MAGMA (version 2.7.2) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date August 2023
*/

#ifndef MAGMA_AUXILIARY_H
#define MAGMA_AUXILIARY_H

#include "magma_types.h"

#include <math.h>  // sqrtf

#ifdef __cplusplus
extern "C" {
#endif


// =============================================================================
// initialization

magma_int_t magma_init( void );
magma_int_t magma_finalize( void );

#ifdef MAGMA_HAVE_OPENCL
magma_int_t magma_init_opencl(
    cl_platform_id platform,
    cl_context context,
    magma_int_t setup_clBlas );

magma_int_t magma_finalize_opencl(
    magma_int_t finalize_clBlas );
#endif


// =============================================================================
// version information

void magma_version( magma_int_t* major, magma_int_t* minor, magma_int_t* micro );
void magma_print_environment();


// =============================================================================
// timing

real_Double_t magma_wtime( void );
real_Double_t magma_sync_wtime( magma_queue_t queue );


// =============================================================================
// misc. functions

// magma GPU-complex PCIe connection
magma_int_t magma_buildconnection_mgpu(
    magma_int_t gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2],
    magma_int_t *ncmplx,
    magma_int_t ngpu );

void magma_indices_1D_bcyclic(
    magma_int_t nb, magma_int_t ngpu, magma_int_t dev,
    magma_int_t j0, magma_int_t j1,
    magma_int_t* dj0, magma_int_t* dj1 );

void magma_swp2pswp(
    magma_trans_t trans, magma_int_t n,
    magma_int_t *ipiv,
    magma_int_t *newipiv );


// =============================================================================
// get NB blocksize

magma_int_t magma_get_smlsize_divideconquer();


// =============================================================================
// memory allocation

magma_int_t
magma_malloc( magma_ptr *ptr_ptr, size_t bytes );

magma_int_t
magma_malloc_cpu( void **ptr_ptr, size_t bytes );

magma_int_t
magma_malloc_pinned( void **ptr_ptr, size_t bytes );

magma_int_t
magma_free_cpu( void *ptr );

#define magma_free( ptr ) \
        magma_free_internal( ptr, __func__, __FILE__, __LINE__ )

#define magma_free_pinned( ptr ) \
        magma_free_pinned_internal( ptr, __func__, __FILE__, __LINE__ )

magma_int_t
magma_free_internal(
    magma_ptr ptr,
    const char* func, const char* file, int line );

magma_int_t
magma_free_pinned_internal(
    void *ptr,
    const char* func, const char* file, int line );

// returns memory info (basically a wrapper around cudaMemGetInfo
magma_int_t
magma_mem_info(size_t* freeMem, size_t* totalMem);

// wrapper around cudaMemset
magma_int_t
magma_memset(void * ptr, int value, size_t count);

// wrapper around cudaMemsetAsync
magma_int_t
magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue);

// type-safe convenience functions to avoid using (void**) cast and sizeof(...)
// here n is the number of elements (floats, doubles, etc.) not the number of bytes.
/******************************************************************************/
/// @addtogroup magma_malloc
/// imalloc, smalloc, etc.
/// @{

/// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
static inline magma_int_t magma_imalloc( magmaInt_ptr           *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t)        ); }

/// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
static inline magma_int_t magma_index_malloc( magmaIndex_ptr    *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t)      ); }

/// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
static inline magma_int_t magma_uindex_malloc( magmaUIndex_ptr    *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t)      ); }

/// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes.
static inline magma_int_t magma_smalloc( magmaFloat_ptr         *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(float)              ); }

/// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes.
static inline magma_int_t magma_dmalloc( magmaDouble_ptr        *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(double)             ); }

/// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
static inline magma_int_t magma_cmalloc( magmaFloatComplex_ptr  *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex)  ); }

/// Type-safe version of magma_malloc(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
static inline magma_int_t magma_zmalloc( magmaDoubleComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex) ); }

/// @}


/******************************************************************************/
/// @addtogroup magma_malloc_cpu
/// imalloc_cpu, smalloc_cpu, etc.
/// @{

/// Type-safe version of magma_malloc_cpu(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
static inline magma_int_t magma_imalloc_cpu( magma_int_t        **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(magma_int_t)        ); }

/// Type-safe version of magma_malloc_cpu(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
static inline magma_int_t magma_index_malloc_cpu( magma_index_t **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(magma_index_t)      ); }

/// Type-safe version of magma_malloc_cpu(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
static inline magma_int_t magma_uindex_malloc_cpu( magma_uindex_t **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(magma_uindex_t)      ); }

/// Type-safe version of magma_malloc_cpu(), for float arrays. Allocates n*sizeof(float) bytes.
static inline magma_int_t magma_smalloc_cpu( float              **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(float)              ); }

/// Type-safe version of magma_malloc_cpu(), for double arrays. Allocates n*sizeof(double) bytes.
static inline magma_int_t magma_dmalloc_cpu( double             **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(double)             ); }

/// Type-safe version of magma_malloc_cpu(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
static inline magma_int_t magma_cmalloc_cpu( magmaFloatComplex  **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(magmaFloatComplex)  ); }

/// Type-safe version of magma_malloc_cpu(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
static inline magma_int_t magma_zmalloc_cpu( magmaDoubleComplex **ptr_ptr, size_t n ) { return magma_malloc_cpu( (void**) ptr_ptr, n*sizeof(magmaDoubleComplex) ); }

/// @}


/******************************************************************************/
/// @addtogroup magma_malloc_pinned
/// imalloc_pinned, smalloc_pinned, etc.
/// @{

/// Type-safe version of magma_malloc_pinned(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
static inline magma_int_t magma_imalloc_pinned( magma_int_t        **ptr_ptr, size_t n ) { return magma_malloc_pinned( (void**) ptr_ptr, n*sizeof(magma_int_t)        ); }

/// Type-safe version of magma_malloc_pinned(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
static inline magma_int_t magma_index_malloc_pinned( magma_index_t **ptr_ptr, size_t n ) { return magma_malloc_pinned( (void**) ptr_ptr, n*sizeof(magma_index_t)      ); }

/// Type-safe version of magma_malloc_pinned(), for float arrays. Allocates n*sizeof(float) bytes.
static inline magma_int_t magma_smalloc_pinned( float              **ptr_ptr, size_t n ) { return magma_malloc_pinned( (void**) ptr_ptr, n*sizeof(float)              ); }

/// Type-safe version of magma_malloc_pinned(), for double arrays. Allocates n*sizeof(double) bytes.
static inline magma_int_t magma_dmalloc_pinned( double             **ptr_ptr, size_t n ) { return magma_malloc_pinned( (void**) ptr_ptr, n*sizeof(double)             ); }

/// Type-safe version of magma_malloc_pinned(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
static inline magma_int_t magma_cmalloc_pinned( magmaFloatComplex  **ptr_ptr, size_t n ) { return magma_malloc_pinned( (void**) ptr_ptr, n*sizeof(magmaFloatComplex)  ); }

/// Type-safe version of magma_malloc_pinned(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
static inline magma_int_t magma_zmalloc_pinned( magmaDoubleComplex **ptr_ptr, size_t n ) { return magma_malloc_pinned( (void**) ptr_ptr, n*sizeof(magmaDoubleComplex) ); }

/// @}

// CUDA MAGMA only
magma_int_t magma_is_devptr( const void* ptr );


// =============================================================================
// device support

magma_int_t
magma_num_gpus( void );
/* todo: num_accelerators */
/* todo: total accelerators? available accelerators? i.e., number to use vs. number available. */

// CUDA MAGMA only
magma_int_t
magma_getdevice_arch();
/* magma_int_t magma_getdevice_arch( magma_int_t dev or queue );   todo: new */

void
magma_getdevices(
    magma_device_t* devices,
    magma_int_t     size,
    magma_int_t*    num_dev );

void
magma_getdevice( magma_device_t* dev );

void
magma_setdevice( magma_device_t dev );

size_t
magma_mem_size( magma_queue_t queue );

magma_int_t
magma_getdevice_multiprocessor_count();

size_t
magma_getdevice_shmem_block();

size_t
magma_getdevice_shmem_multiprocessor();

// =============================================================================
// queue support
// new magma_queue_create adds device
#define magma_queue_create(          device, queue_ptr ) \
        magma_queue_create_internal( device, queue_ptr, __func__, __FILE__, __LINE__ )

#define magma_queue_create_from_cuda(          device, cuda_stream, cublas_handle, cusparse_handle, queue_ptr ) \
        magma_queue_create_from_cuda_internal( device, cuda_stream, cublas_handle, cusparse_handle, queue_ptr, __func__, __FILE__, __LINE__ )

#define magma_queue_create_from_hip(           device, hip_stream, hipblas_handle, hipsparse_handle, queue_ptr ) \
        magma_queue_create_from_hip_internal( device, hip_stream, hipblas_handle, hipsparse_handle, queue_ptr, __func__, __FILE__, __LINE__ )

#define magma_queue_create_from_opencl(          device, cl_queue, queue_ptr ) \
        magma_queue_create_from_opencl_internal( device, cl_queue, queue_ptr, __func__, __FILE__, __LINE__ )

#define magma_queue_destroy( queue ) \
        magma_queue_destroy_internal( queue, __func__, __FILE__, __LINE__ )

#define magma_queue_sync( queue ) \
        magma_queue_sync_internal( queue, __func__, __FILE__, __LINE__ )

void
magma_queue_create_internal(
    magma_device_t device,
    magma_queue_t* queue_ptr,
    const char* func, const char* file, int line );

#ifdef MAGMA_HAVE_CUDA
void
magma_queue_create_from_cuda_internal(
    magma_device_t   device,
    cudaStream_t     stream,
    cublasHandle_t   cublas,
    cusparseHandle_t cusparse,
    magma_queue_t*   queue_ptr,
    const char* func, const char* file, int line );
#endif


#ifdef MAGMA_HAVE_HIP
void
magma_queue_create_from_hip_internal(
    magma_device_t    device,
    hipStream_t       stream,
    hipblasHandle_t   hipblas,
    hipsparseHandle_t hipsparse,
    magma_queue_t*    queue_ptr,
    const char* func, const char* file, int line );
#endif


#ifdef MAGMA_HAVE_OPENCL
magma_int_t
magma_queue_create_from_opencl_internal(
    magma_device_t   device,
    cl_command_queue cl_queue,
    const char* func, const char* file, int line );
#endif

void
magma_queue_destroy_internal(
    magma_queue_t queue,
    const char* func, const char* file, int line );

void
magma_queue_sync_internal(
    magma_queue_t queue,
    const char* func, const char* file, int line );

magma_int_t
magma_queue_get_device( magma_queue_t queue );


// =============================================================================
// event support

void
magma_event_create( magma_event_t* event_ptr );

void
magma_event_create_untimed( magma_event_t* event_ptr );

void
magma_event_destroy( magma_event_t event );

void
magma_event_record( magma_event_t event, magma_queue_t queue );

void
magma_event_query( magma_event_t event );

void
magma_event_sync( magma_event_t event );

void
magma_queue_wait_event( magma_queue_t queue, magma_event_t event );


// =============================================================================
// error handler

void magma_xerbla( const char *name, magma_int_t info );

const char* magma_strerror( magma_int_t error );


// =============================================================================
// string functions

size_t magma_strlcpy( char *dst, const char *src, size_t size );


// =============================================================================
// integer functions

/// For integers x >= 0, y > 0, returns ceil( x/y ).
/// For x == 0, this is 0.
/// @ingroup magma_ceildiv
__host__ __device__
static inline magma_int_t magma_ceildiv( magma_int_t x, magma_int_t y )
{
    return (x + y - 1)/y;
}

/// For integers x >= 0, y > 0, returns x rounded up to multiple of y.
/// That is, ceil(x/y)*y.
/// For x == 0, this is 0.
/// This implementation does not assume y is a power of 2.
/// @ingroup magma_ceildiv
__host__ __device__
static inline magma_int_t magma_roundup( magma_int_t x, magma_int_t y )
{
    return magma_ceildiv( x, y ) * y;
}


// =============================================================================
// scalar functions

// real and complex square root
// sqrt alone cannot be caught by the generation script because of tsqrt

/// @return Square root of x. @ingroup magma_sqrt
static inline float  magma_ssqrt( float  x ) { return sqrtf( x ); }

/// @return Square root of x. @ingroup magma_sqrt
static inline double magma_dsqrt( double x ) { return sqrt( x ); }

/// @return Complex square root of x. @ingroup magma_sqrt
magmaFloatComplex    magma_csqrt( magmaFloatComplex  x );

/// @return Complex square root of x. @ingroup magma_sqrt
magmaDoubleComplex   magma_zsqrt( magmaDoubleComplex x );


#ifdef __cplusplus
}
#endif


#endif // MAGMA_AUXILIARY_H
Loading