Loading leonardo_booster.sh +4 −4 Original line number Original line Diff line number Diff line Loading @@ -18,8 +18,8 @@ OMP_GPU=100 #number of particles per GPU thread # # # test tree build on GPU and CPU # test tree build on GPU and CPU # # OMP_NUM_THREADS=4 CC='gcc' HW_FLAGS='-fopenmp -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp OMP_NUM_THREADS=4 CC='gcc' EC_FLAGS='-fopenmp -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp OMP_NUM_THREADS=4 CC='nvc++' HW_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=10000 -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp OMP_NUM_THREADS=4 CC='nvc++' EC_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=10000 -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp for ppl in "${PPL[@]}" ; do # loop over particles per leaf for ppl in "${PPL[@]}" ; do # loop over particles per leaf for n in "${N[@]}" ; do # loop over n of particles for n in "${N[@]}" ; do # loop over n of particles Loading @@ -27,14 +27,14 @@ for ppl in "${PPL[@]}" ; do # loop over particles per leaf for threads in "${THREADS[@]}" ; do #loop over number of OpenMPThreads for threads in "${THREADS[@]}" ; do #loop over number of OpenMPThreads if [[ "$mode" == "cpu" ]]; then if [[ "$mode" == "cpu" ]]; then export CC=gcc export CC=gcc export HW_FLAGS='-O3 -fopenmp' export EC_FLAGS='-O3 -fopenmp' export OMP_NUM_THREADS=$threads export OMP_NUM_THREADS=$threads else else if [[ $threads != 8 ]]; then if [[ $threads != 8 ]]; then continue continue fi fi export CC=nvc++ export CC=nvc++ export HW_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=1000 -O3' export EC_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=1000 -O3' # gpu always uses 32 cpu cores but actually doesnt # gpu always uses 32 cpu cores but actually doesnt export OMP_NUM_THREADS=32 change much export OMP_NUM_THREADS=32 change much threads=$(python -c "print(int($N//100))") #for the text hilight: " threads=$(python -c "print(int($N//100))") #for the text hilight: " Loading Loading
leonardo_booster.sh +4 −4 Original line number Original line Diff line number Diff line Loading @@ -18,8 +18,8 @@ OMP_GPU=100 #number of particles per GPU thread # # # test tree build on GPU and CPU # test tree build on GPU and CPU # # OMP_NUM_THREADS=4 CC='gcc' HW_FLAGS='-fopenmp -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp OMP_NUM_THREADS=4 CC='gcc' EC_FLAGS='-fopenmp -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp OMP_NUM_THREADS=4 CC='nvc++' HW_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=10000 -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp OMP_NUM_THREADS=4 CC='nvc++' EC_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=10000 -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp for ppl in "${PPL[@]}" ; do # loop over particles per leaf for ppl in "${PPL[@]}" ; do # loop over particles per leaf for n in "${N[@]}" ; do # loop over n of particles for n in "${N[@]}" ; do # loop over n of particles Loading @@ -27,14 +27,14 @@ for ppl in "${PPL[@]}" ; do # loop over particles per leaf for threads in "${THREADS[@]}" ; do #loop over number of OpenMPThreads for threads in "${THREADS[@]}" ; do #loop over number of OpenMPThreads if [[ "$mode" == "cpu" ]]; then if [[ "$mode" == "cpu" ]]; then export CC=gcc export CC=gcc export HW_FLAGS='-O3 -fopenmp' export EC_FLAGS='-O3 -fopenmp' export OMP_NUM_THREADS=$threads export OMP_NUM_THREADS=$threads else else if [[ $threads != 8 ]]; then if [[ $threads != 8 ]]; then continue continue fi fi export CC=nvc++ export CC=nvc++ export HW_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=1000 -O3' export EC_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=1000 -O3' # gpu always uses 32 cpu cores but actually doesnt # gpu always uses 32 cpu cores but actually doesnt export OMP_NUM_THREADS=32 change much export OMP_NUM_THREADS=32 change much threads=$(python -c "print(int($N//100))") #for the text hilight: " threads=$(python -c "print(int($N//100))") #for the text hilight: " Loading