Commit dbc6ef2d authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Test for target offloading capability

parent 3c3b1b3f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -445,6 +445,7 @@ NMEDIT = @NMEDIT@
NVTXFLAGS = @NVTXFLAGS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OFFLOADFLAGS = @OFFLOADFLAGS@
OMPFLAGS = @OMPFLAGS@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
+106 −3
Original line number Diff line number Diff line
@@ -664,6 +664,7 @@ MAGMAFLAGS
LAPACKLDFLAGS
LAPACKFLAGS
OMPFLAGS
OFFLOADFLAGS
HDF5_LDFLAGS
HDF5_LIB
HDF5_INCLUDE
@@ -810,6 +811,7 @@ with_aix_soname
with_gnu_ld
with_sysroot
enable_libtool_lock
enable_offload
enable_openmp
with_lapack
with_magma
@@ -1472,6 +1474,8 @@ Optional Features:
  --enable-fast-install[=PKGS]
                          optimize for fast installation [default=yes]
  --disable-libtool-lock  avoid locking (might break parallel builds)
  --enable-offload        enable target offloading (requires g++ version >=
                          13) [default=auto]
  --enable-openmp         enable OpneMP multi-threading [default=yes]
  --enable-nvtx           use NVTX profiling [default=no]
@@ -24883,18 +24887,117 @@ esac
fi
# Configure the optional features
# Check whether --enable-offload was given.
if test ${enable_offload+y}
then :
  enableval=$enable_offload;
    if test "x$enableval" != "xno"; then
    cat > np_test_offload.cpp <<EOF
#include <omp.h>
#pragma omp requires unified_shared_memory
#pragma omp begin declare target device_type(any)
void fill_with_ones(int *array) {
#pragma omp target teams distribute parallel for
  for (int i = 0; i < 1000; i++) {
    for (int j = 0; j < 1000; j++) {
      array[(1000 * i) + j] = 1;
    }
  }
}
#pragma omp end declare target
int main(int argc, char** argv) {
  int *numbers = new int[1000000]();
  fill_with_ones(numbers);
  delete[] numbers;
  return 0;
}
EOF
    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1
    export CXX_SUPPORTS_OFFLOAD=$?
    rm np_test_offload.cpp
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      rm np_test_offload.o
    fi
      if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5
printf "%s\n" "$as_me: Enabling offload." >&6;}
        OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"
      else
        as_fn_error $? "Target offload was requested, but it is not supported!" "$LINENO" 5
      fi
    else
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5
printf "%s\n" "$as_me: Disabling offload." >&6;}
      OFFLOADFLAGS=""
    fi
else case e in #(
  e)
    cat > np_test_offload.cpp <<EOF
#include <omp.h>
#pragma omp requires unified_shared_memory
#pragma omp begin declare target device_type(any)
void fill_with_ones(int *array) {
#pragma omp target teams distribute parallel for
  for (int i = 0; i < 1000; i++) {
    for (int j = 0; j < 1000; j++) {
      array[(1000 * i) + j] = 1;
    }
  }
}
#pragma omp end declare target
int main(int argc, char** argv) {
  int *numbers = new int[1000000]();
  fill_with_ones(numbers);
  delete[] numbers;
  return 0;
}
EOF
    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1
    export CXX_SUPPORTS_OFFLOAD=$?
    rm np_test_offload.cpp
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      rm np_test_offload.o
    fi
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5
printf "%s\n" "$as_me: Enabling offload." >&6;}
      OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"
    else
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5
printf "%s\n" "$as_me: Disabling offload." >&6;}
      OFFLOADFLAGS=""
    fi
 ;;
esac
fi
# Check whether --enable-openmp was given.
if test ${enable_openmp+y}
then :
  enableval=$enable_openmp;
    if test "x$enableval" != "xno"; then
      OMPFLAGS="-fopenmp"
      OMPFLAGS="-DUSE_OPENMP -fopenmp"
    fi
else case e in #(
  e)
    OMPFLAGS="-fopenmp"
    OMPFLAGS="-DUSE_OPENMP -fopenmp"
 ;;
@@ -25175,7 +25278,7 @@ esac
fi
CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
SUBDIRS="cluster libnptm sphere testing trapping"
# Generate the output
+65 −3
Original line number Diff line number Diff line
@@ -93,6 +93,39 @@ EOF
    fi
  ]
)

m4_define(
  [M4_TEST_OFFLOAD],
  [
    cat > np_test_offload.cpp <<EOF
#include <omp.h>
#pragma omp requires unified_shared_memory

#pragma omp begin declare target device_type(any)
void fill_with_ones(int *array) {
#pragma omp target teams distribute parallel for
  for (int i = 0; i < 1000; i++) {
    for (int j = 0; j < 1000; j++) {
      array[[(1000 * i) + j]] = 1;
    }
  }
}
#pragma omp end declare target
int main(int argc, char** argv) {
  int *numbers = new int[[1000000]]();
  fill_with_ones(numbers);
  delete[[]] numbers;
  return 0;
}
EOF
    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null #2>&1
    export CXX_SUPPORTS_OFFLOAD=$?
    rm np_test_offload.cpp
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      rm np_test_offload.o
    fi
  ]
)
# END CAPABILITY TESTING MACROS

# autoconf setup initialization
@@ -219,16 +252,45 @@ AS_IF(
)

# Configure the optional features
AC_ARG_ENABLE(
  [offload],
  [AS_HELP_STRING([--enable-offload], [enable target offloading (requires g++ version >= 13) [default=auto]])],
  [
    if test "x$enableval" != "xno"; then
      M4_TEST_OFFLOAD
      if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
        AC_MSG_NOTICE([Enabling offload.])
        AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"])
      else
        AC_MSG_ERROR([Target offload was requested, but it is not supported!])
      fi
    else
      AC_MSG_NOTICE([Disabling offload.])
      AC_SUBST([OFFLOADFLAGS], [""])
    fi
  ],
  [
    M4_TEST_OFFLOAD
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      AC_MSG_NOTICE([Enabling offload.])
      AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp"])
    else
      AC_MSG_NOTICE([Disabling offload.])
      AC_SUBST([OFFLOADFLAGS], [""])
    fi
  ]
)

AC_ARG_ENABLE(
  [openmp],
  [AS_HELP_STRING([--enable-openmp], [enable OpneMP multi-threading [default=yes]])],
  [
    if test "x$enableval" != "xno"; then
      AC_SUBST([OMPFLAGS], ["-fopenmp"])
      AC_SUBST([OMPFLAGS], ["-DUSE_OPENMP -fopenmp"])
    fi
  ],
  [
    AC_SUBST([OMPFLAGS], ["-fopenmp"])
    AC_SUBST([OMPFLAGS], ["-DUSE_OPENMP -fopenmp"])
  ]
)

@@ -346,7 +408,7 @@ AC_ARG_WITH(
  ]
)

CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
SUBDIRS="cluster libnptm sphere testing trapping"

# Generate the output