Commit d871fe90 authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Merge branch 'parallel_angles_gmu' into 'master'

Implement parallel directional routines

See merge request giacomo.mulas/np_tmcode!52
parents 244cbc04 ca5e3b80
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
LDADD=libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${MAGMALDFLAGS}
lib_LTLIBRARIES=libnptm/libnptm.la
libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp ../src/libnptm/types.cpp
libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp
bin_PROGRAMS=cluster/edfb_clu cluster/clu cluster/np_cluster sphere/edfb_sph sphere/sph sphere/np_sphere trapping/frfme trapping/lffft trapping/np_trapping testing/test_TEDF testing/test_TTMS
cluster_edfb_clu_SOURCES=../src/cluster/edfb_clu.f
cluster_clu_SOURCES=../src/cluster/clu.f
+3 −9
Original line number Diff line number Diff line
@@ -145,8 +145,7 @@ am_libnptm_libnptm_la_OBJECTS = ../src/libnptm/algebraic.lo \
	../src/libnptm/lapack_calls.lo ../src/libnptm/logging.lo \
	../src/libnptm/magma_calls.lo ../src/libnptm/Parsers.lo \
	../src/libnptm/sph_subs.lo ../src/libnptm/tfrfme.lo \
	../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo \
	../src/libnptm/types.lo
	../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo
libnptm_libnptm_la_OBJECTS = $(am_libnptm_libnptm_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -254,7 +253,6 @@ am__depfiles_remade = ../src/cluster/$(DEPDIR)/cluster.Po \
	../src/libnptm/$(DEPDIR)/sph_subs.Plo \
	../src/libnptm/$(DEPDIR)/tfrfme.Plo \
	../src/libnptm/$(DEPDIR)/tra_subs.Plo \
	../src/libnptm/$(DEPDIR)/types.Plo \
	../src/sphere/$(DEPDIR)/np_sphere.Po \
	../src/sphere/$(DEPDIR)/sphere.Po \
	../src/testing/$(DEPDIR)/test_TEDF.Po \
@@ -445,6 +443,7 @@ NMEDIT = @NMEDIT@
NVTXFLAGS = @NVTXFLAGS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OFFLOADFLAGS = @OFFLOADFLAGS@
OMPFLAGS = @OMPFLAGS@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
@@ -521,7 +520,7 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
LDADD = libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${MAGMALDFLAGS}
lib_LTLIBRARIES = libnptm/libnptm.la
libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp ../src/libnptm/types.cpp
libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp
cluster_edfb_clu_SOURCES = ../src/cluster/edfb_clu.f
cluster_clu_SOURCES = ../src/cluster/clu.f
cluster_np_cluster_SOURCES = ../src/cluster/np_cluster.cpp ../src/cluster/cluster.cpp
@@ -686,8 +685,6 @@ clean-libLTLIBRARIES:
	../src/libnptm/$(DEPDIR)/$(am__dirstamp)
../src/libnptm/tra_subs.lo: ../src/libnptm/$(am__dirstamp) \
	../src/libnptm/$(DEPDIR)/$(am__dirstamp)
../src/libnptm/types.lo: ../src/libnptm/$(am__dirstamp) \
	../src/libnptm/$(DEPDIR)/$(am__dirstamp)
libnptm/$(am__dirstamp):
	@$(MKDIR_P) libnptm
	@: > libnptm/$(am__dirstamp)
@@ -833,7 +830,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/sph_subs.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/tfrfme.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/tra_subs.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/types.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@../src/sphere/$(DEPDIR)/np_sphere.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@../src/sphere/$(DEPDIR)/sphere.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@../src/testing/$(DEPDIR)/test_TEDF.Po@am__quote@ # am--include-marker
@@ -1202,7 +1198,6 @@ distclean: distclean-am
	-rm -f ../src/libnptm/$(DEPDIR)/sph_subs.Plo
	-rm -f ../src/libnptm/$(DEPDIR)/tfrfme.Plo
	-rm -f ../src/libnptm/$(DEPDIR)/tra_subs.Plo
	-rm -f ../src/libnptm/$(DEPDIR)/types.Plo
	-rm -f ../src/sphere/$(DEPDIR)/np_sphere.Po
	-rm -f ../src/sphere/$(DEPDIR)/sphere.Po
	-rm -f ../src/testing/$(DEPDIR)/test_TEDF.Po
@@ -1272,7 +1267,6 @@ maintainer-clean: maintainer-clean-am
	-rm -f ../src/libnptm/$(DEPDIR)/sph_subs.Plo
	-rm -f ../src/libnptm/$(DEPDIR)/tfrfme.Plo
	-rm -f ../src/libnptm/$(DEPDIR)/tra_subs.Plo
	-rm -f ../src/libnptm/$(DEPDIR)/types.Plo
	-rm -f ../src/sphere/$(DEPDIR)/np_sphere.Po
	-rm -f ../src/sphere/$(DEPDIR)/sphere.Po
	-rm -f ../src/testing/$(DEPDIR)/test_TEDF.Po
+114 −1
Original line number Diff line number Diff line
@@ -664,6 +664,7 @@ MAGMAFLAGS
LAPACKLDFLAGS
LAPACKFLAGS
OMPFLAGS
OFFLOADFLAGS
HDF5_LDFLAGS
HDF5_LIB
HDF5_INCLUDE
@@ -810,6 +811,7 @@ with_aix_soname
with_gnu_ld
with_sysroot
enable_libtool_lock
enable_offload
enable_openmp
with_lapack
with_magma
@@ -1472,6 +1474,8 @@ Optional Features:
  --enable-fast-install[=PKGS]
                          optimize for fast installation [default=yes]
  --disable-libtool-lock  avoid locking (might break parallel builds)
  --enable-offload        enable target offloading (requires g++ version >=
                          13) [default=auto]
  --enable-openmp         enable OpneMP multi-threading [default=yes]
  --enable-nvtx           use NVTX profiling [default=no]
@@ -24883,6 +24887,105 @@ esac
fi
# Configure the optional features
# Check whether --enable-offload was given.
if test ${enable_offload+y}
then :
  enableval=$enable_offload;
    if test "x$enableval" != "xno"; then
    cat > np_test_offload.cpp <<EOF
#include <omp.h>
#pragma omp requires unified_shared_memory
#pragma omp begin declare target device_type(any)
void fill_with_ones(int *array) {
#pragma omp target teams distribute parallel for
  for (int i = 0; i < 1000; i++) {
    for (int j = 0; j < 1000; j++) {
      array[(1000 * i) + j] = 1;
    }
  }
}
#pragma omp end declare target
int main(int argc, char** argv) {
  int *numbers = new int[1000000]();
  fill_with_ones(numbers);
  delete[] numbers;
  return 0;
}
EOF
    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1
    export CXX_SUPPORTS_OFFLOAD=$?
    rm np_test_offload.cpp
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      rm np_test_offload.o
    fi
      if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5
printf "%s\n" "$as_me: Enabling offload." >&6;}
        OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "
      else
        as_fn_error $? "Target offload was requested, but it is not supported!" "$LINENO" 5
      fi
    else
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5
printf "%s\n" "$as_me: Disabling offload." >&6;}
      OFFLOADFLAGS=""
    fi
else case e in #(
  e)
    cat > np_test_offload.cpp <<EOF
#include <omp.h>
#pragma omp requires unified_shared_memory
#pragma omp begin declare target device_type(any)
void fill_with_ones(int *array) {
#pragma omp target teams distribute parallel for
  for (int i = 0; i < 1000; i++) {
    for (int j = 0; j < 1000; j++) {
      array[(1000 * i) + j] = 1;
    }
  }
}
#pragma omp end declare target
int main(int argc, char** argv) {
  int *numbers = new int[1000000]();
  fill_with_ones(numbers);
  delete[] numbers;
  return 0;
}
EOF
    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1
    export CXX_SUPPORTS_OFFLOAD=$?
    rm np_test_offload.cpp
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      rm np_test_offload.o
    fi
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5
printf "%s\n" "$as_me: Enabling offload." >&6;}
      OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "
    else
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5
printf "%s\n" "$as_me: Disabling offload." >&6;}
      OFFLOADFLAGS=""
    fi
 ;;
esac
fi
# Check whether --enable-openmp was given.
if test ${enable_openmp+y}
then :
@@ -25175,7 +25278,17 @@ esac
fi
CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
if test "x$OFFLOADFLAGS" != "x"
then :
  OMPFLAGS=""
else case e in #(
  e) OMPFLAGS=$OMPFLAGS
 ;;
esac
fi
CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
SUBDIRS="cluster libnptm sphere testing trapping"
# Generate the output
+68 −1
Original line number Diff line number Diff line
@@ -93,6 +93,39 @@ EOF
    fi
  ]
)

m4_define(
  [M4_TEST_OFFLOAD],
  [
    cat > np_test_offload.cpp <<EOF
#include <omp.h>
#pragma omp requires unified_shared_memory

#pragma omp begin declare target device_type(any)
void fill_with_ones(int *array) {
#pragma omp target teams distribute parallel for
  for (int i = 0; i < 1000; i++) {
    for (int j = 0; j < 1000; j++) {
      array[[(1000 * i) + j]] = 1;
    }
  }
}
#pragma omp end declare target
int main(int argc, char** argv) {
  int *numbers = new int[[1000000]]();
  fill_with_ones(numbers);
  delete[[]] numbers;
  return 0;
}
EOF
    $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1
    export CXX_SUPPORTS_OFFLOAD=$?
    rm np_test_offload.cpp
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      rm np_test_offload.o
    fi
  ]
)
# END CAPABILITY TESTING MACROS

# autoconf setup initialization
@@ -219,6 +252,35 @@ AS_IF(
)

# Configure the optional features
AC_ARG_ENABLE(
  [offload],
  [AS_HELP_STRING([--enable-offload], [enable target offloading (requires g++ version >= 13) [default=auto]])],
  [
    if test "x$enableval" != "xno"; then
      M4_TEST_OFFLOAD
      if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
        AC_MSG_NOTICE([Enabling offload.])
        AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "])
      else
        AC_MSG_ERROR([Target offload was requested, but it is not supported!])
      fi
    else
      AC_MSG_NOTICE([Disabling offload.])
      AC_SUBST([OFFLOADFLAGS], [""])
    fi
  ],
  [
    M4_TEST_OFFLOAD
    if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then
      AC_MSG_NOTICE([Enabling offload.])
      AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "])
    else
      AC_MSG_NOTICE([Disabling offload.])
      AC_SUBST([OFFLOADFLAGS], [""])
    fi
  ]
)

AC_ARG_ENABLE(
  [openmp],
  [AS_HELP_STRING([--enable-openmp], [enable OpneMP multi-threading [default=yes]])],
@@ -346,7 +408,12 @@ AC_ARG_WITH(
  ]
)

CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
AS_IF(
  [test "x$OFFLOADFLAGS" != "x"],
  [AC_SUBST([OMPFLAGS], [""])],
  [AC_SUBST([OMPFLAGS], [$OMPFLAGS])]
)
CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS"
SUBDIRS="cluster libnptm sphere testing trapping"

# Generate the output
+9 −1
Original line number Diff line number Diff line
@@ -306,7 +306,15 @@ void cluster(const string& config_file, const string& data_file, const string& o
#ifdef USE_NVTX
      nvtxRangePush("First iteration");
#endif
      int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
      // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration
      int jer = 0;
#pragma omp parallel
      {
#pragma omp single
	{
	  jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp);
	}
      }
#ifdef USE_NVTX
      nvtxRangePop();
#endif
Loading