Merge branch 'h1_optimization' into 'main' (3bdbe349) · Commits · Luca Tornatore / dADP

Makefile

+3 −1

Original line number	Diff line number	Diff line
		CC=mpicc
		CFLAGS=-O3 -g -fopenmp
		#CC=mpiicx
		#CFLAGS=-O3 -march=native -flto -funroll-loops -fopenmp
		CFLAGS=-O3 -fopenmp
		LDFLAGS=-lm

		all: main

README.md

+2 −1

Original line number	Diff line number	Diff line
		@@ -15,7 +15,8 @@ The suggestion is to run it with one mpi task per socket.

		# Todo

		- [ ] H1: implementation of lock free centers elimination
		- [ ] argument parsing: find an elegant way to pass parameters and file (maybe a config file?)
		- [~] H1: implementation of lock free centers elimination (work in progress)
		- [ ] context: open all windows in a single shot, close them all togheter
		- [ ] io: curation of IO using mpi IO or other solutions
		- [ ] kdtree: optimization an profiling

run_leo

+16 −5

Original line number	Diff line number	Diff line
		#!/bin/bash

		#SBATCH --nodes=6
		#SBATCH --nodes=2
		#SBATCH --ntasks-per-node=2
		#SBATCH --cpus-per-task=56
		#SBATCH --time=04:00:00
		#SBATCH --job-name=dadp_test
		#SBATCH --partition=dcgp_usr_prod
		#SBATCH --account=IscrC_dadp
		#SBATCH --account=EUHPC_D18_045
		#SBATCH --output=out_leo
		#SBATCH --error=err_leo
		#SBATCH --mem=480G
		@@ -14,8 +14,11 @@


		cd $SLURM_SUBMIT_DIR
		module restore my_gcc
		#module restore my_intel

		#module load gcc
		#module load openmpi
		module load intel-oneapi-mpi

		make clean
		make
		ulimit -s unlimited
		@@ -31,10 +34,18 @@ mkdir bb
		OUT_ASSIGNMENT=/leonardo_scratch/large/userexternal/ftomba00/assignment
		OUT_DATA=/leonardo_scratch/large/userexternal/ftomba00/data

		IN_DATA=/leonardo_work/IscrC_dadp
		IN_DATA=/leonardo_work/EUHPC_D18_045

		#10^6 points
		time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}
		#time mpirun -n ${SLURM_NTASKS} --map-by core ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}

		#34 * 10^6 points
		#time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g1212639_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}

		#88 * 10^6 points
		#time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g5503149_091_0000 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}


		#200 * 10^6 points
		#time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -i ${IN_DATA}/norm_data/std_g2980844_091_0000 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}

src/adp/adp.c

+409 −101

File changed.

Preview size limit exceeded, changes collapsed.

src/adp/adp.h

+15 −0

Original line number	Diff line number	Diff line
		@@ -45,6 +45,21 @@ typedef struct merge_t
		float_t density;
		} merge_t;

		typedef struct center_removal_t
		{
		int rank;
		idx_t source_id;
		idx_t target_id;
		float_t source_density;
		} center_removal_t;

		typedef struct center_removal_queue_t
		{
		center_removal_t* data;
		idx_t count;
		idx_t size;
		} center_removal_queue_t;



		void compute_density_kstarnn_rma(global_context_t* ctx, const float_t d, int verbose);