Loading Makefile +3 −1 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O3 -g -fopenmp #CC=mpiicx #CFLAGS=-O3 -march=native -flto -funroll-loops -fopenmp CFLAGS=-O3 -fopenmp LDFLAGS=-lm all: main Loading README.md +2 −1 Original line number Diff line number Diff line Loading @@ -15,7 +15,8 @@ The suggestion is to run it with one mpi task per socket. # Todo - [ ] H1: implementation of lock free centers elimination - [ ] argument parsing: find an elegant way to pass parameters and file (maybe a config file?) - [~] H1: implementation of lock free centers elimination (*work in progress*) - [ ] context: open all windows in a single shot, close them all togheter - [ ] io: curation of IO using mpi IO or other solutions - [ ] kdtree: optimization an profiling Loading run_leo +16 −5 Original line number Diff line number Diff line #!/bin/bash #SBATCH --nodes=6 #SBATCH --nodes=2 #SBATCH --ntasks-per-node=2 #SBATCH --cpus-per-task=56 #SBATCH --time=04:00:00 #SBATCH --job-name=dadp_test #SBATCH --partition=dcgp_usr_prod #SBATCH --account=IscrC_dadp #SBATCH --account=EUHPC_D18_045 #SBATCH --output=out_leo #SBATCH --error=err_leo #SBATCH --mem=480G Loading @@ -14,8 +14,11 @@ cd $SLURM_SUBMIT_DIR module restore my_gcc #module restore my_intel #module load gcc #module load openmpi module load intel-oneapi-mpi make clean make ulimit -s unlimited Loading @@ -31,10 +34,18 @@ mkdir bb OUT_ASSIGNMENT=/leonardo_scratch/large/userexternal/ftomba00/assignment OUT_DATA=/leonardo_scratch/large/userexternal/ftomba00/data IN_DATA=/leonardo_work/IscrC_dadp IN_DATA=/leonardo_work/EUHPC_D18_045 #10^6 points time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #time mpirun -n ${SLURM_NTASKS} --map-by core ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #34 * 10^6 points #time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g1212639_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #88 * 10^6 points #time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g5503149_091_0000 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #200 * 10^6 points #time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g2980844_091_0000 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} src/adp/adp.c +409 −101 File changed.Preview size limit exceeded, changes collapsed. Show changes src/adp/adp.h +15 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,21 @@ typedef struct merge_t float_t density; } merge_t; typedef struct center_removal_t { int rank; idx_t source_id; idx_t target_id; float_t source_density; } center_removal_t; typedef struct center_removal_queue_t { center_removal_t* data; idx_t count; idx_t size; } center_removal_queue_t; void compute_density_kstarnn_rma(global_context_t* ctx, const float_t d, int verbose); Loading Loading
Makefile +3 −1 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O3 -g -fopenmp #CC=mpiicx #CFLAGS=-O3 -march=native -flto -funroll-loops -fopenmp CFLAGS=-O3 -fopenmp LDFLAGS=-lm all: main Loading
README.md +2 −1 Original line number Diff line number Diff line Loading @@ -15,7 +15,8 @@ The suggestion is to run it with one mpi task per socket. # Todo - [ ] H1: implementation of lock free centers elimination - [ ] argument parsing: find an elegant way to pass parameters and file (maybe a config file?) - [~] H1: implementation of lock free centers elimination (*work in progress*) - [ ] context: open all windows in a single shot, close them all togheter - [ ] io: curation of IO using mpi IO or other solutions - [ ] kdtree: optimization an profiling Loading
run_leo +16 −5 Original line number Diff line number Diff line #!/bin/bash #SBATCH --nodes=6 #SBATCH --nodes=2 #SBATCH --ntasks-per-node=2 #SBATCH --cpus-per-task=56 #SBATCH --time=04:00:00 #SBATCH --job-name=dadp_test #SBATCH --partition=dcgp_usr_prod #SBATCH --account=IscrC_dadp #SBATCH --account=EUHPC_D18_045 #SBATCH --output=out_leo #SBATCH --error=err_leo #SBATCH --mem=480G Loading @@ -14,8 +14,11 @@ cd $SLURM_SUBMIT_DIR module restore my_gcc #module restore my_intel #module load gcc #module load openmpi module load intel-oneapi-mpi make clean make ulimit -s unlimited Loading @@ -31,10 +34,18 @@ mkdir bb OUT_ASSIGNMENT=/leonardo_scratch/large/userexternal/ftomba00/assignment OUT_DATA=/leonardo_scratch/large/userexternal/ftomba00/data IN_DATA=/leonardo_work/IscrC_dadp IN_DATA=/leonardo_work/EUHPC_D18_045 #10^6 points time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #time mpirun -n ${SLURM_NTASKS} --map-by core ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #34 * 10^6 points #time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g1212639_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #88 * 10^6 points #time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g5503149_091_0000 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #200 * 10^6 points #time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g2980844_091_0000 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}
src/adp/adp.h +15 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,21 @@ typedef struct merge_t float_t density; } merge_t; typedef struct center_removal_t { int rank; idx_t source_id; idx_t target_id; float_t source_density; } center_removal_t; typedef struct center_removal_queue_t { center_removal_t* data; idx_t count; idx_t size; } center_removal_queue_t; void compute_density_kstarnn_rma(global_context_t* ctx, const float_t d, int verbose); Loading