Loading build/Makefile.am +1 −1 Original line number Diff line number Diff line LDADD=libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${MAGMALDFLAGS} lib_LTLIBRARIES=libnptm/libnptm.la libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp ../src/libnptm/types.cpp libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp bin_PROGRAMS=cluster/edfb_clu cluster/clu cluster/np_cluster sphere/edfb_sph sphere/sph sphere/np_sphere trapping/frfme trapping/lffft trapping/np_trapping testing/test_TEDF testing/test_TTMS cluster_edfb_clu_SOURCES=../src/cluster/edfb_clu.f cluster_clu_SOURCES=../src/cluster/clu.f Loading build/Makefile.in +3 −9 Original line number Diff line number Diff line Loading @@ -145,8 +145,7 @@ am_libnptm_libnptm_la_OBJECTS = ../src/libnptm/algebraic.lo \ ../src/libnptm/lapack_calls.lo ../src/libnptm/logging.lo \ ../src/libnptm/magma_calls.lo ../src/libnptm/Parsers.lo \ ../src/libnptm/sph_subs.lo ../src/libnptm/tfrfme.lo \ ../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo \ ../src/libnptm/types.lo ../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo libnptm_libnptm_la_OBJECTS = $(am_libnptm_libnptm_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) Loading Loading @@ -254,7 +253,6 @@ am__depfiles_remade = ../src/cluster/$(DEPDIR)/cluster.Po \ ../src/libnptm/$(DEPDIR)/sph_subs.Plo \ ../src/libnptm/$(DEPDIR)/tfrfme.Plo \ ../src/libnptm/$(DEPDIR)/tra_subs.Plo \ ../src/libnptm/$(DEPDIR)/types.Plo \ ../src/sphere/$(DEPDIR)/np_sphere.Po \ ../src/sphere/$(DEPDIR)/sphere.Po \ ../src/testing/$(DEPDIR)/test_TEDF.Po \ Loading Loading @@ -445,6 +443,7 @@ NMEDIT = @NMEDIT@ NVTXFLAGS = @NVTXFLAGS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OFFLOADFLAGS = @OFFLOADFLAGS@ OMPFLAGS = @OMPFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ Loading Loading @@ -521,7 +520,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LDADD = libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${MAGMALDFLAGS} lib_LTLIBRARIES = libnptm/libnptm.la libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp ../src/libnptm/types.cpp libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp cluster_edfb_clu_SOURCES = ../src/cluster/edfb_clu.f cluster_clu_SOURCES = ../src/cluster/clu.f cluster_np_cluster_SOURCES = ../src/cluster/np_cluster.cpp ../src/cluster/cluster.cpp Loading Loading @@ -686,8 +685,6 @@ clean-libLTLIBRARIES: ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/tra_subs.lo: ../src/libnptm/$(am__dirstamp) \ ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/types.lo: ../src/libnptm/$(am__dirstamp) \ ../src/libnptm/$(DEPDIR)/$(am__dirstamp) libnptm/$(am__dirstamp): @$(MKDIR_P) libnptm @: > libnptm/$(am__dirstamp) Loading Loading @@ -833,7 +830,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/sph_subs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/tfrfme.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/tra_subs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/types.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/sphere/$(DEPDIR)/np_sphere.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/sphere/$(DEPDIR)/sphere.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/testing/$(DEPDIR)/test_TEDF.Po@am__quote@ # am--include-marker Loading Loading @@ -1202,7 +1198,6 @@ distclean: distclean-am -rm -f ../src/libnptm/$(DEPDIR)/sph_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/tfrfme.Plo -rm -f ../src/libnptm/$(DEPDIR)/tra_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/types.Plo -rm -f ../src/sphere/$(DEPDIR)/np_sphere.Po -rm -f ../src/sphere/$(DEPDIR)/sphere.Po -rm -f ../src/testing/$(DEPDIR)/test_TEDF.Po Loading Loading @@ -1272,7 +1267,6 @@ maintainer-clean: maintainer-clean-am -rm -f ../src/libnptm/$(DEPDIR)/sph_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/tfrfme.Plo -rm -f ../src/libnptm/$(DEPDIR)/tra_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/types.Plo -rm -f ../src/sphere/$(DEPDIR)/np_sphere.Po -rm -f ../src/sphere/$(DEPDIR)/sphere.Po -rm -f ../src/testing/$(DEPDIR)/test_TEDF.Po Loading build/configure +114 −1 Original line number Diff line number Diff line Loading @@ -664,6 +664,7 @@ MAGMAFLAGS LAPACKLDFLAGS LAPACKFLAGS OMPFLAGS OFFLOADFLAGS HDF5_LDFLAGS HDF5_LIB HDF5_INCLUDE Loading Loading @@ -810,6 +811,7 @@ with_aix_soname with_gnu_ld with_sysroot enable_libtool_lock enable_offload enable_openmp with_lapack with_magma Loading Loading @@ -1472,6 +1474,8 @@ Optional Features: --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) --enable-offload enable target offloading (requires g++ version >= 13) [default=auto] --enable-openmp enable OpneMP multi-threading [default=yes] --enable-nvtx use NVTX profiling [default=no] Loading Loading @@ -24883,6 +24887,105 @@ esac fi # Configure the optional features # Check whether --enable-offload was given. if test ${enable_offload+y} then : enableval=$enable_offload; if test "x$enableval" != "xno"; then cat > np_test_offload.cpp <<EOF #include <omp.h> #pragma omp requires unified_shared_memory #pragma omp begin declare target device_type(any) void fill_with_ones(int *array) { #pragma omp target teams distribute parallel for for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { array[(1000 * i) + j] = 1; } } } #pragma omp end declare target int main(int argc, char** argv) { int *numbers = new int[1000000](); fill_with_ones(numbers); delete[] numbers; return 0; } EOF $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1 export CXX_SUPPORTS_OFFLOAD=$? rm np_test_offload.cpp if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then rm np_test_offload.o fi if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5 printf "%s\n" "$as_me: Enabling offload." >&6;} OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD " else as_fn_error $? "Target offload was requested, but it is not supported!" "$LINENO" 5 fi else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5 printf "%s\n" "$as_me: Disabling offload." >&6;} OFFLOADFLAGS="" fi else case e in #( e) cat > np_test_offload.cpp <<EOF #include <omp.h> #pragma omp requires unified_shared_memory #pragma omp begin declare target device_type(any) void fill_with_ones(int *array) { #pragma omp target teams distribute parallel for for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { array[(1000 * i) + j] = 1; } } } #pragma omp end declare target int main(int argc, char** argv) { int *numbers = new int[1000000](); fill_with_ones(numbers); delete[] numbers; return 0; } EOF $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1 export CXX_SUPPORTS_OFFLOAD=$? rm np_test_offload.cpp if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then rm np_test_offload.o fi if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5 printf "%s\n" "$as_me: Enabling offload." >&6;} OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD " else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5 printf "%s\n" "$as_me: Disabling offload." >&6;} OFFLOADFLAGS="" fi ;; esac fi # Check whether --enable-openmp was given. if test ${enable_openmp+y} then : Loading Loading @@ -25175,7 +25278,17 @@ esac fi CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" if test "x$OFFLOADFLAGS" != "x" then : OMPFLAGS="" else case e in #( e) OMPFLAGS=$OMPFLAGS ;; esac fi CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" SUBDIRS="cluster libnptm sphere testing trapping" # Generate the output build/configure.ac +68 −1 Original line number Diff line number Diff line Loading @@ -93,6 +93,39 @@ EOF fi ] ) m4_define( [M4_TEST_OFFLOAD], [ cat > np_test_offload.cpp <<EOF #include <omp.h> #pragma omp requires unified_shared_memory #pragma omp begin declare target device_type(any) void fill_with_ones(int *array) { #pragma omp target teams distribute parallel for for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { array[[(1000 * i) + j]] = 1; } } } #pragma omp end declare target int main(int argc, char** argv) { int *numbers = new int[[1000000]](); fill_with_ones(numbers); delete[[]] numbers; return 0; } EOF $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1 export CXX_SUPPORTS_OFFLOAD=$? rm np_test_offload.cpp if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then rm np_test_offload.o fi ] ) # END CAPABILITY TESTING MACROS # autoconf setup initialization Loading Loading @@ -219,6 +252,35 @@ AS_IF( ) # Configure the optional features AC_ARG_ENABLE( [offload], [AS_HELP_STRING([--enable-offload], [enable target offloading (requires g++ version >= 13) [default=auto]])], [ if test "x$enableval" != "xno"; then M4_TEST_OFFLOAD if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then AC_MSG_NOTICE([Enabling offload.]) AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "]) else AC_MSG_ERROR([Target offload was requested, but it is not supported!]) fi else AC_MSG_NOTICE([Disabling offload.]) AC_SUBST([OFFLOADFLAGS], [""]) fi ], [ M4_TEST_OFFLOAD if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then AC_MSG_NOTICE([Enabling offload.]) AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "]) else AC_MSG_NOTICE([Disabling offload.]) AC_SUBST([OFFLOADFLAGS], [""]) fi ] ) AC_ARG_ENABLE( [openmp], [AS_HELP_STRING([--enable-openmp], [enable OpneMP multi-threading [default=yes]])], Loading Loading @@ -346,7 +408,12 @@ AC_ARG_WITH( ] ) CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" AS_IF( [test "x$OFFLOADFLAGS" != "x"], [AC_SUBST([OMPFLAGS], [""])], [AC_SUBST([OMPFLAGS], [$OMPFLAGS])] ) CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" SUBDIRS="cluster libnptm sphere testing trapping" # Generate the output Loading src/cluster/cluster.cpp +9 −1 Original line number Diff line number Diff line Loading @@ -306,7 +306,15 @@ void cluster(const string& config_file, const string& data_file, const string& o #ifdef USE_NVTX nvtxRangePush("First iteration"); #endif int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration int jer = 0; #pragma omp parallel { #pragma omp single { jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); } } #ifdef USE_NVTX nvtxRangePop(); #endif Loading Loading
build/Makefile.am +1 −1 Original line number Diff line number Diff line LDADD=libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${MAGMALDFLAGS} lib_LTLIBRARIES=libnptm/libnptm.la libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp ../src/libnptm/types.cpp libnptm_libnptm_la_SOURCES=../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp bin_PROGRAMS=cluster/edfb_clu cluster/clu cluster/np_cluster sphere/edfb_sph sphere/sph sphere/np_sphere trapping/frfme trapping/lffft trapping/np_trapping testing/test_TEDF testing/test_TTMS cluster_edfb_clu_SOURCES=../src/cluster/edfb_clu.f cluster_clu_SOURCES=../src/cluster/clu.f Loading
build/Makefile.in +3 −9 Original line number Diff line number Diff line Loading @@ -145,8 +145,7 @@ am_libnptm_libnptm_la_OBJECTS = ../src/libnptm/algebraic.lo \ ../src/libnptm/lapack_calls.lo ../src/libnptm/logging.lo \ ../src/libnptm/magma_calls.lo ../src/libnptm/Parsers.lo \ ../src/libnptm/sph_subs.lo ../src/libnptm/tfrfme.lo \ ../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo \ ../src/libnptm/types.lo ../src/libnptm/TransitionMatrix.lo ../src/libnptm/tra_subs.lo libnptm_libnptm_la_OBJECTS = $(am_libnptm_libnptm_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) Loading Loading @@ -254,7 +253,6 @@ am__depfiles_remade = ../src/cluster/$(DEPDIR)/cluster.Po \ ../src/libnptm/$(DEPDIR)/sph_subs.Plo \ ../src/libnptm/$(DEPDIR)/tfrfme.Plo \ ../src/libnptm/$(DEPDIR)/tra_subs.Plo \ ../src/libnptm/$(DEPDIR)/types.Plo \ ../src/sphere/$(DEPDIR)/np_sphere.Po \ ../src/sphere/$(DEPDIR)/sphere.Po \ ../src/testing/$(DEPDIR)/test_TEDF.Po \ Loading Loading @@ -445,6 +443,7 @@ NMEDIT = @NMEDIT@ NVTXFLAGS = @NVTXFLAGS@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OFFLOADFLAGS = @OFFLOADFLAGS@ OMPFLAGS = @OMPFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ Loading Loading @@ -521,7 +520,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LDADD = libnptm/libnptm.la -L/usr/lib64 ${USER_LDFLAGS} ${HDF5_LDFLAGS} ${LAPACKLDFLAGS} ${MAGMALDFLAGS} lib_LTLIBRARIES = libnptm/libnptm.la libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp ../src/libnptm/types.cpp libnptm_libnptm_la_SOURCES = ../src/libnptm/algebraic.cpp ../src/libnptm/clu_subs.cpp ../src/libnptm/Commons.cpp ../src/libnptm/Configuration.cpp ../src/libnptm/file_io.cpp ../src/libnptm/lapack_calls.cpp ../src/libnptm/logging.cpp ../src/libnptm/magma_calls.cpp ../src/libnptm/Parsers.cpp ../src/libnptm/sph_subs.cpp ../src/libnptm/tfrfme.cpp ../src/libnptm/TransitionMatrix.cpp ../src/libnptm/tra_subs.cpp cluster_edfb_clu_SOURCES = ../src/cluster/edfb_clu.f cluster_clu_SOURCES = ../src/cluster/clu.f cluster_np_cluster_SOURCES = ../src/cluster/np_cluster.cpp ../src/cluster/cluster.cpp Loading Loading @@ -686,8 +685,6 @@ clean-libLTLIBRARIES: ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/tra_subs.lo: ../src/libnptm/$(am__dirstamp) \ ../src/libnptm/$(DEPDIR)/$(am__dirstamp) ../src/libnptm/types.lo: ../src/libnptm/$(am__dirstamp) \ ../src/libnptm/$(DEPDIR)/$(am__dirstamp) libnptm/$(am__dirstamp): @$(MKDIR_P) libnptm @: > libnptm/$(am__dirstamp) Loading Loading @@ -833,7 +830,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/sph_subs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/tfrfme.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/tra_subs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/libnptm/$(DEPDIR)/types.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/sphere/$(DEPDIR)/np_sphere.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/sphere/$(DEPDIR)/sphere.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../src/testing/$(DEPDIR)/test_TEDF.Po@am__quote@ # am--include-marker Loading Loading @@ -1202,7 +1198,6 @@ distclean: distclean-am -rm -f ../src/libnptm/$(DEPDIR)/sph_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/tfrfme.Plo -rm -f ../src/libnptm/$(DEPDIR)/tra_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/types.Plo -rm -f ../src/sphere/$(DEPDIR)/np_sphere.Po -rm -f ../src/sphere/$(DEPDIR)/sphere.Po -rm -f ../src/testing/$(DEPDIR)/test_TEDF.Po Loading Loading @@ -1272,7 +1267,6 @@ maintainer-clean: maintainer-clean-am -rm -f ../src/libnptm/$(DEPDIR)/sph_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/tfrfme.Plo -rm -f ../src/libnptm/$(DEPDIR)/tra_subs.Plo -rm -f ../src/libnptm/$(DEPDIR)/types.Plo -rm -f ../src/sphere/$(DEPDIR)/np_sphere.Po -rm -f ../src/sphere/$(DEPDIR)/sphere.Po -rm -f ../src/testing/$(DEPDIR)/test_TEDF.Po Loading
build/configure +114 −1 Original line number Diff line number Diff line Loading @@ -664,6 +664,7 @@ MAGMAFLAGS LAPACKLDFLAGS LAPACKFLAGS OMPFLAGS OFFLOADFLAGS HDF5_LDFLAGS HDF5_LIB HDF5_INCLUDE Loading Loading @@ -810,6 +811,7 @@ with_aix_soname with_gnu_ld with_sysroot enable_libtool_lock enable_offload enable_openmp with_lapack with_magma Loading Loading @@ -1472,6 +1474,8 @@ Optional Features: --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) --enable-offload enable target offloading (requires g++ version >= 13) [default=auto] --enable-openmp enable OpneMP multi-threading [default=yes] --enable-nvtx use NVTX profiling [default=no] Loading Loading @@ -24883,6 +24887,105 @@ esac fi # Configure the optional features # Check whether --enable-offload was given. if test ${enable_offload+y} then : enableval=$enable_offload; if test "x$enableval" != "xno"; then cat > np_test_offload.cpp <<EOF #include <omp.h> #pragma omp requires unified_shared_memory #pragma omp begin declare target device_type(any) void fill_with_ones(int *array) { #pragma omp target teams distribute parallel for for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { array[(1000 * i) + j] = 1; } } } #pragma omp end declare target int main(int argc, char** argv) { int *numbers = new int[1000000](); fill_with_ones(numbers); delete[] numbers; return 0; } EOF $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1 export CXX_SUPPORTS_OFFLOAD=$? rm np_test_offload.cpp if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then rm np_test_offload.o fi if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5 printf "%s\n" "$as_me: Enabling offload." >&6;} OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD " else as_fn_error $? "Target offload was requested, but it is not supported!" "$LINENO" 5 fi else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5 printf "%s\n" "$as_me: Disabling offload." >&6;} OFFLOADFLAGS="" fi else case e in #( e) cat > np_test_offload.cpp <<EOF #include <omp.h> #pragma omp requires unified_shared_memory #pragma omp begin declare target device_type(any) void fill_with_ones(int *array) { #pragma omp target teams distribute parallel for for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { array[(1000 * i) + j] = 1; } } } #pragma omp end declare target int main(int argc, char** argv) { int *numbers = new int[1000000](); fill_with_ones(numbers); delete[] numbers; return 0; } EOF $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1 export CXX_SUPPORTS_OFFLOAD=$? rm np_test_offload.cpp if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then rm np_test_offload.o fi if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Enabling offload." >&5 printf "%s\n" "$as_me: Enabling offload." >&6;} OFFLOADFLAGS="-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD " else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Disabling offload." >&5 printf "%s\n" "$as_me: Disabling offload." >&6;} OFFLOADFLAGS="" fi ;; esac fi # Check whether --enable-openmp was given. if test ${enable_openmp+y} then : Loading Loading @@ -25175,7 +25278,17 @@ esac fi CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" if test "x$OFFLOADFLAGS" != "x" then : OMPFLAGS="" else case e in #( e) OMPFLAGS=$OMPFLAGS ;; esac fi CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" SUBDIRS="cluster libnptm sphere testing trapping" # Generate the output
build/configure.ac +68 −1 Original line number Diff line number Diff line Loading @@ -93,6 +93,39 @@ EOF fi ] ) m4_define( [M4_TEST_OFFLOAD], [ cat > np_test_offload.cpp <<EOF #include <omp.h> #pragma omp requires unified_shared_memory #pragma omp begin declare target device_type(any) void fill_with_ones(int *array) { #pragma omp target teams distribute parallel for for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { array[[(1000 * i) + j]] = 1; } } } #pragma omp end declare target int main(int argc, char** argv) { int *numbers = new int[[1000000]](); fill_with_ones(numbers); delete[[]] numbers; return 0; } EOF $CXX -fcf-protection=check -foffload=default -foffload=nvptx-none="-O3 -ggdb -fopt-info -lm -latomic -mgomp" -fopenmp -c np_test_offload.cpp > /dev/null 2>&1 export CXX_SUPPORTS_OFFLOAD=$? rm np_test_offload.cpp if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then rm np_test_offload.o fi ] ) # END CAPABILITY TESTING MACROS # autoconf setup initialization Loading Loading @@ -219,6 +252,35 @@ AS_IF( ) # Configure the optional features AC_ARG_ENABLE( [offload], [AS_HELP_STRING([--enable-offload], [enable target offloading (requires g++ version >= 13) [default=auto]])], [ if test "x$enableval" != "xno"; then M4_TEST_OFFLOAD if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then AC_MSG_NOTICE([Enabling offload.]) AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "]) else AC_MSG_ERROR([Target offload was requested, but it is not supported!]) fi else AC_MSG_NOTICE([Disabling offload.]) AC_SUBST([OFFLOADFLAGS], [""]) fi ], [ M4_TEST_OFFLOAD if test "x$CXX_SUPPORTS_OFFLOAD" = "x0"; then AC_MSG_NOTICE([Enabling offload.]) AC_SUBST([OFFLOADFLAGS], ["-fcf-protection=check -foffload=default -foffload=nvptx-none=\"-O3 -ggdb -fopt-info -lm -latomic -mgomp\" -fopenmp -DUSE_TARGET_OFFLOAD "]) else AC_MSG_NOTICE([Disabling offload.]) AC_SUBST([OFFLOADFLAGS], [""]) fi ] ) AC_ARG_ENABLE( [openmp], [AS_HELP_STRING([--enable-openmp], [enable OpneMP multi-threading [default=yes]])], Loading Loading @@ -346,7 +408,12 @@ AC_ARG_WITH( ] ) CXXFLAGS="$CLANGFLAGS -O3 -ggdb $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" AS_IF( [test "x$OFFLOADFLAGS" != "x"], [AC_SUBST([OMPFLAGS], [""])], [AC_SUBST([OMPFLAGS], [$OMPFLAGS])] ) CXXFLAGS="$CLANGFLAGS -O3 -ggdb $OFFLOADFLAGS $USER_INCLUDE -I$HDF5_INCLUDE $OMPFLAGS $MPIFLAGS $LAPACKFLAGS $MAGMAFLAGS $NVTXFLAGS" SUBDIRS="cluster libnptm sphere testing trapping" # Generate the output Loading
src/cluster/cluster.cpp +9 −1 Original line number Diff line number Diff line Loading @@ -306,7 +306,15 @@ void cluster(const string& config_file, const string& data_file, const string& o #ifdef USE_NVTX nvtxRangePush("First iteration"); #endif int jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); // use these pragmas, which should have no effect on parallelism, just to push OMP nested levels at the same level also in the first wavelength iteration int jer = 0; #pragma omp parallel { #pragma omp single { jer = cluster_jxi488_cycle(jxi488, sconf, gconf, p_scattering_angles, cid, p_output, output_path, vtppoanp); } } #ifdef USE_NVTX nvtxRangePop(); #endif Loading