Edit leonardo_booster.sh (df9680e5) · Commits · hotwheels / gitlab-profile

leonardo_booster.sh

+4 −4

Original line number	Diff line number	Diff line
		@@ -18,8 +18,8 @@ OMP_GPU=100 #number of particles per GPU thread
		#
		# test tree build on GPU and CPU
		#
		OMP_NUM_THREADS=4 CC='gcc' HW_FLAGS='-fopenmp -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp
		OMP_NUM_THREADS=4 CC='nvc++' HW_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=10000 -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp
		OMP_NUM_THREADS=4 CC='gcc' EC_FLAGS='-fopenmp -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp
		OMP_NUM_THREADS=4 CC='nvc++' EC_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=10000 -O3' srun -n 1 python -m hotwheels_core.wrap src/test_target.cpp

		for ppl in "${PPL[@]}" ; do # loop over particles per leaf
		for n in "${N[@]}" ; do # loop over n of particles
		@@ -27,14 +27,14 @@ for ppl in "${PPL[@]}" ; do # loop over particles per leaf
		for threads in "${THREADS[@]}" ; do #loop over number of OpenMPThreads
		if [[ "$mode" == "cpu" ]]; then
		export CC=gcc
		export HW_FLAGS='-O3 -fopenmp'
		export EC_FLAGS='-O3 -fopenmp'
		export OMP_NUM_THREADS=$threads
		else
		if [[ $threads != 8 ]]; then
		continue
		fi
		export CC=nvc++
		export HW_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=1000 -O3'
		export EC_FLAGS='-mp=gpu -gpu=rdc,managed -gpu=cc80 -Minfo=mp,accel -Minline=1000 -O3'
		# gpu always uses 32 cpu cores but actually doesnt
		export OMP_NUM_THREADS=32 change much
		threads=$(python -c "print(int($N//100))") #for the text hilight: "