Commit d7aacba1 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

updated script

parent a7e894a2
Loading
Loading
Loading
Loading
+91 −55
Original line number Diff line number Diff line
#!/bin/bash
#SBATCH -A clusters
#SBATCH -p pleiadi-gpu
#SBATCH -J LOFAR
#SBATCH -A IscrC_RICK
#SBATCH -p boost_usr_prod
##SBATCH --qos boost_qos_bprod
#SBATCH -J RICK
### number of nodes
#SBATCH -N 4
#SBATCH -N 1
### number of hyperthreading threads
#SBATCH --ntasks-per-core=1
### number of MPI tasks per node
#SBATCH --ntasks-per-node=36
#SBATCH -n 144
#SBATCH --ntasks-per-node=4
#SBATCH -n 4
### number of openmp threads
#SBATCH --cpus-per-task=1
#SBATCH --cpus-per-task=8
### number of allocated GPUs per node
##SBATCH --gres=gpu:2
#SBATCH --mem=110G
#SBATCH --gpus-per-node=4
#SBATCH --mem=450G
#SBATCH -o test.out
#SBATCH -e test.err
#SBATCH -t 08:00:00 
#SBATCH -t 03:00:00 


module purge
module load openmpi/
module load fftw/
module load nvhpc/

echo $SLURM_NODELIST
export MODULE_VERSION=5.0.1
source /opt/cluster/spack/share/spack/setup-env.sh
module load default-gcc-11.2.0
export PATH=/opt/cluster/spack/nvidia/gcc/11.2.0/bin:$PATH
export LD_LIBRARY_PATH=/opt/cluster/spack/nvidia/gcc/11.2.0/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/leonardo/prod/spack/03/install/0.19/linux-rhel8-icelake/gcc-8.5.0/nvhpc-23.1-x5lw6edfmfuot2ipna3wseallzl4oolm/Linux_x86_64/23.1/comm_libs/11.8/nccl/lib:$LD_LIBRARY_PATH

export OMPI_CC=gcc
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

cd /u/glacopo/LOFAR/hpc_imaging-end-of-re_structuring/
make SYSTYPE=Amonra_gpu -j1 clean
rm -f w-stackingCfftw_ring
make SYSTYPE=Amonra_gpu -j1 mpi_ring_omp
cd ../
make -j1 clean
rm -f w-stacking_fftw_acc-omp_acc-reduce
make -j1 w-stacking

export use_cuda=no

if [ "$use_cuda" = "no" ]
then
  export typestring=omp_cpu
  export exe=w-stackingCfftw_ring_omp
fi
OUT_SHM=result_${SLURM_NTASKS}_${typestring}_${SLURM_CPUS_PER_TASK}
OUT_SHM_RES=/leonardo_scratch/large/userexternal/glacopo0/hpc_imaging/scripts/Tests/times_${SLURM_NTASKS}_${typestring}_${SLURM_CPUS_PER_TASK}

if [ "$use_cuda" = "yes" ]
then
  export typestring=cuda
  export exe=w-stackingfftw
fi
rm -f ${OUT_SHM} ${OUT_SHM_RES}

Tmin=2
Tmax=32
export typestring=omp_gpu
export exe=w-stacking_fftw_acc-omp_acc-reduce

for (( t=$Tmin; t<=$Tmax; t*=2 ))
do
    N=$(( 4*${t} ))
    echo -e "\tRunning $t tasks per node\n"
    export logdir=mpi_${N}_${typestring}_${SLURM_CPUS_PER_TASK}
export logdir=mpi_${SLURM_NTASKS}_${typestring}_${SLURM_CPUS_PER_TASK}
echo "Creating $logdir"
rm -fr $logdir
mkdir $logdir

    for itest in {1..3}
for itest in {1..10}
do
  export logfile=test_${itest}_${logdir}.log
	echo "time mpirun -np ${N} --map-by ppr:${t}:node /u/glacopo/LOFAR/hpc_imaging-end-of-re_structuring/${exe} /u/glacopo/LOFAR/hpc_imaging-end-of-re_structuring/data/paramfile.txt" > $logfile
	time mpirun -np ${N} --map-by ppr:${t}:node --bind-to core --mca btl self,vader /u/glacopo/LOFAR/hpc_imaging-end-of-re_structuring/${exe} /u/glacopo/LOFAR/hpc_imaging-end-of-re_structuring/data/paramfile.txt >> $logfile
  echo "time mpirun -np ${SLURM_NTASKS} --map-by ppr:4:node /leonardo_scratch/large/userexternal/glacopo0/hpc_imaging/${exe} /leonardo_scratch/large/userexternal/glacopo0/hpc_imaging/data/paramfile.txt" > $logfile
  time mpirun -np ${SLURM_NTASKS} --map-by ppr:4:node --bind-to core --mca btl self,vader /leonardo_scratch/large/userexternal/glacopo0/hpc_imaging/${exe} data/paramfile.txt >> $logfile
  mv $logfile $logdir
  mv timings.dat ${logdir}/timings_${itest}.dat
  cat ${logdir}/timings_all.dat ${logdir}/timings_${itest}.dat >> ${logdir}/timings_all.dat

  Reduce_time=$( grep -w 'Reduce time :' $logdir/$logfile | gawk '{print $4}' )
  FFTW_time=$( grep -w 'FFTW time :' $logdir/$logfile | gawk '{print $4}' )
  Composition_time=$( grep -w 'Array Composition time :' $logdir/$logfile | gawk '{print $5}' )
  Writing_time=$( grep -w ' Image writing time :' $logdir/$logfile | gawk '{print $5}' )
  Total_time=$( grep -w 'TOT time :' $logdir/$logfile | gawk '{print $4}' )

  #Not relevant for the paper
  Setup_time=$( grep -w 'Setup time:' $logdir/$logfile | gawk '{print $3}' )
  Kernel_time=$( grep -w 'Kernel time :' $logdir/$logfile | gawk '{print $4}' )
  Phase_time=$( grep -w 'Phase time :' $logdir/$logfile | gawk '{print $4}' )
  ##########################

  
  echo $itest $Reduce_time $FFTW_time $Composition_time $Writing_time $Total_time $Setup_time $Kernel_time $Phase_time >> ${OUT_SHM}
done
done

echo -e "\n\n" >> ${OUT_SHM}
avg_red=$( awk '{sum+=$2} END { print sum/10 }' ${OUT_SHM} )
avg_fftw=$( awk '{sum+=$3} END { print sum/10 }' ${OUT_SHM} )
avg_comp=$( awk '{sum+=$4} END { print sum/10 }' ${OUT_SHM} )
avg_write=$( awk '{sum+=$5} END { print sum/10 }' ${OUT_SHM} )
avg_tot=$( awk '{sum+=$6} END { print sum/10 }' ${OUT_SHM} )

std_red=$( awk '{if($2!=""){count++;sum+=$2};y+=$2^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
std_fftw=$( awk '{if($3!=""){count++;sum+=$3};y+=$3^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
std_comp=$( awk '{if($4!=""){count++;sum+=$4};y+=$4^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
std_write=$( awk '{if($5!=""){count++;sum+=$5};y+=$5^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
std_tot=$( awk '{if($6!=""){count++;sum+=$6};y+=$6^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )


#Not relevant for the paper
avg_setup=$( awk '{sum+=$7} END { print sum/10 }' ${OUT_SHM} )
avg_ker=$( awk '{sum+=$8} END { print sum/10 }' ${OUT_SHM} )
avg_phase=$( awk '{sum+=$9} END { print sum/10 }' ${OUT_SHM} )

std_setup=$( awk '{if($7!=""){count++;sum+=$7};y+=$7^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
std_ker=$( awk '{if($8!=""){count++;sum+=$8};y+=$8^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
std_phase=$( awk '{if($9!=""){count++;sum+=$9};y+=$9^2} END{sq=sqrt(y/10-(sum/10)^2);sq=sq?sq:0;print sq}' ${OUT_SHM} )
##########################


echo "Averages and standard deviations over 10 shots" >> ${OUT_SHM_RES}
echo -e "\n" ${OUT_SHM_RES}
echo "${SLURM_NTASKS} MPI tasks;  ${SLURM_CPUS_PER_TASK} OpenMP threads per task;  ${SLURM_GPUS_PER_NODE} GPUs per node;" >> ${OUT_SHM_RES}
echo -e "\n\n" ${OUT_SHM_RES}
echo $avg_red $std_red $avg_fftw $std_fftw $avg_comp $std_comp $avg_write $std_write $avg_tot $std_tot >> ${OUT_SHM_RES}
echo -e "\n\n" ${OUT_SHM_RES}
echo $avg_setup $std_setup $avg_ker $std_ker $avg_phase $std_phase >> ${OUT_SHM_RES} 
rm -f ${OUT_SHM}