Loading run_leo +9 −4 Original line number Diff line number Diff line Loading @@ -31,13 +31,18 @@ export PSM2_MQ_RECVREQS_MAX=268435456 rm bb/* mkdir bb OUT_ASSIGNMENT=/leonardo_scratch/large/userexternal/ftomba00/assignment OUT_DATA=/leonardo_scratch/large/userexternal/ftomba00/data OUT_DIR=/leonardo_scratch/large/userexternal/ftomba00/exp1M IN_DATA=/leonardo_work/EUHPC_D18_045 rm -rf ${OUT_DIR} mkdir ${OUT_DIR} #10^6 points time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -k 100 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -k 100 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -o ${OUT_DIR} ## to modify those one below #time mpirun -n ${SLURM_NTASKS} --map-by core ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #34 * 10^6 points Loading src/common/common.c +1 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ void get_context(global_context_t* ctx) ctx -> local_datapoints = NULL; ctx -> __local_heap_buffers = NULL; ctx -> input_data_in_float32 = -1; ctx -> local_tree_type = KD; ctx -> dims = 0; ctx -> k = 300; ctx -> z = 3; Loading src/common/common.h +6 −4 Original line number Diff line number Diff line Loading @@ -11,16 +11,16 @@ //#include <stdarg.h> // #define PARALLEL_FIX_BORDERS #define WRITE_SHUFFLED_DATA #define WRITE_NGBH // #define WRITE_SHUFFLED_DATA // #define WRITE_NGBH // #define WRITE_TOP_NODES #define WRITE_DENSITY // #define WRITE_DENSITY // #define WRITE_CLUSTER_ASSIGN_H1 // #define WRITE_BORDERS // #define WRITE_CENTERS_PRE_MERGING // #define WRITE_MERGES_INFO // #define WRITE_MERGING_TABLE #define WRITE_FINAL_ASSIGNMENT // #define WRITE_FINAL_ASSIGNMENT // #define PRINT_NGBH_EXCHANGE_SCHEME // #define PRINT_H2_COMM_SCHEME Loading Loading @@ -162,6 +162,7 @@ typedef struct datapoint_info_t { float_t log_rho_err; // } datapoint_info_t; enum local_tree_type {VP, KD}; typedef struct global_context_t { Loading @@ -188,6 +189,7 @@ typedef struct global_context_t idx_t* og_idxs; //original indexes heap_node_t* __local_heap_buffers; //buffer that stores nearest neighbors MPI_Comm mpi_communicator; //mpi communicator enum local_tree_type local_tree_type; //local tree strategy int input_data_in_float32; char input_data_file[DEFAULT_STR_LEN]; char output_assignment_file[DEFAULT_STR_LEN]; Loading src/main/main.c +150 −72 Original line number Diff line number Diff line Loading @@ -8,29 +8,6 @@ #include <unistd.h> #include <getopt.h> #ifdef AMONRA #pragma message "Hi, you are on amonra" #define OUT_CLUSTER_ASSIGN "/beegfs/ftomba/phd/results/final_assignment.npy" #define OUT_DATA "/beegfs/ftomba/phd/results/ordered_data.npy" #endif #ifdef LEONARDO #define OUT_CLUSTER_ASSIGN "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/final_assignment.npy" #define OUT_DATA "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/ordered_data.npy" #endif #ifdef LUMI #define OUT_CLUSTER_ASSIGN "~/scratch_dadp/out_dadp/final_assignment.npy" #define OUT_DATA "~/scratch_dadp/out_dadp/ordered_data.npy" #endif #ifndef OUT_CLUSTER_ASSIGN #define OUT_CLUSTER_ASSIGN "final_assignment.npy" #define OUT_DATA "ordered_data.npy" #endif #ifdef THREAD_FUNNELED #define THREAD_LEVEL MPI_THREAD_FUNNELED #else Loading @@ -43,10 +20,10 @@ struct option long_options[] = {"in-data" , required_argument, 0, 'i'}, {"in-dtype", required_argument, 0, 't'}, {"in-dims" , required_argument, 0, 'd'}, {"out-data", optional_argument, 0, 'o'}, {"out-assignment", optional_argument, 0, 'a'}, {"out-directory", required_argument, 0, 'o'}, {"kngbh", optional_argument, 0, 'k'}, {"z", optional_argument, 0, 'z'}, {"knn-strategy", optional_argument, 0, 's'}, {"help", optional_argument, 0, 'h'}, {0, 0, 0, 0} }; Loading @@ -58,12 +35,10 @@ const char* help = "Distributed Advanced Density Peak\n"\ " -d --in-dims (required) number of dimensions of the data file, dadp expects something\n"\ " of the form N x d where N is inferred from the lenght of the\n"\ " data file\n"\ " -o --out-data (optional) output path for the data, the datafile is shuffled between\n"\ " mpi tasks and datapoints are ordered default is `out_data` \n"\ " -a --out-assignment (optional) output path for the cluster assignment output ranges [0 ... Nc - 1]\n"\ " for core points halo points have indices [-Nc ... -1] conversion\n"\ " of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"\ " -o --out-directory (required) output path for the data, cluster assignment and original indices \n"\ " of the datapoints. Produces files like `data.[rank]`, `og_idx.[rank]` and `assignment.[rank]`\n"\ " -k --kngbh (optional) number of nearest neighbors to compute\n"\ " -s --knn-strategy (optional) strategy for local knn (allowed values 'kd' for kdtree, 'vp' for ball-tree (vantage point)\n"\ " -z --z (optional) number of nearest neighbors to compute\n"; void parse_args(global_context_t* ctx, int argc, char** argv) Loading @@ -72,10 +47,8 @@ void parse_args(global_context_t* ctx, int argc, char** argv) int opt; int input_file_set = 0; int input_type_set = 0; snprintf(ctx -> output_assignment_file, DEFAULT_STR_LEN, "%s", OUT_CLUSTER_ASSIGN); snprintf(ctx -> output_data_file, DEFAULT_STR_LEN, "%s", OUT_DATA); while((opt = getopt_long(argc, argv, "i:t:d:o:a:k:z:h", long_options, NULL)) != -1) while((opt = getopt_long(argc, argv, "i:t:d:o:k:z:hs:", long_options, NULL)) != -1) { switch(opt) { Loading Loading @@ -105,9 +78,6 @@ void parse_args(global_context_t* ctx, int argc, char** argv) case 'o': strncpy(ctx -> output_data_file, optarg, DEFAULT_STR_LEN); break; case 'a': strncpy(ctx -> output_assignment_file, optarg, DEFAULT_STR_LEN); break; case 'k': ctx -> k = atoi(optarg); break; Loading @@ -119,6 +89,22 @@ void parse_args(global_context_t* ctx, int argc, char** argv) MPI_Finalize(); exit(0); case 's': if(strncmp("vp", optarg, 2) == 0) { ctx -> local_tree_type = VP; } else if(strncmp("kd", optarg, 2) == 0) { ctx -> local_tree_type = KD; } else { printf("Cannot find valid local tree type selection, exiting\n"); exit(0); } break; default: mpi_printf(ctx, "%s\n", help); MPI_Finalize(); Loading Loading @@ -161,8 +147,7 @@ void print_hello(global_context_t* ctx) mpi_printf(ctx, "Data file .............> %s\n", ctx -> input_data_file); mpi_printf(ctx, "Input Type .............> float%d\n", ctx -> input_data_in_float32 ? 32 : 64); mpi_printf(ctx, "Dimensions .............> %d\n", ctx -> dims); mpi_printf(ctx, "Output data file .......> %s\n", ctx -> output_data_file); mpi_printf(ctx, "Output assignment file -> %s\n", ctx -> output_assignment_file); mpi_printf(ctx, "Output directory .......> %s\n", ctx -> output_data_file); mpi_printf(ctx, "k ......................> %lu\n", ctx -> k); mpi_printf(ctx, "Z ......................> %.2lf\n", ctx -> z); mpi_printf(ctx, "\nRUNNING!\n"); Loading Loading @@ -255,15 +240,33 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) int halo = MY_TRUE; float_t tol = 0.002; if(I_AM_MASTER && ctx -> world_size <= 6) if(ctx -> world_size <= 6) { if(I_AM_MASTER) { test_file_path(ctx -> output_data_file); test_file_path(ctx -> output_assignment_file); char out_file[200]; snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); test_file_path(out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); test_file_path(out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); test_file_path(out_file); } } else { test_distributed_file_path(ctx, ctx -> output_data_file); test_distributed_file_path(ctx, ctx -> output_assignment_file); char out_file[200]; snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); test_distributed_file_path(ctx, out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); test_distributed_file_path(ctx, out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); test_distributed_file_path(ctx, out_file); } Loading Loading @@ -358,10 +361,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) elapsed_time = TIME_STOP; LOG_WRITE("Top kdtree build and domain decomposition", elapsed_time); TIME_START; kdtree_t local_tree; kdtree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); datapoint_info_t* dp_info = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t)); for(uint64_t i = 0; i < ctx -> local_n_points; ++i) { Loading @@ -378,22 +377,64 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) } ctx -> local_datapoints = dp_info; TIME_START; void* opaque_local_tree; switch(ctx -> local_tree_type) { case KD: { kdtree_t local_tree; kdtree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); build_tree_kdtree_parallel(&local_tree); // build_tree_kdtree_sample(&local_tree); ctx -> local_data = local_tree.data; elapsed_time = TIME_STOP; LOG_WRITE("Local trees init and build", elapsed_time); LOG_WRITE("Local kdtrees init and build", elapsed_time); TIME_START; MPI_DB_PRINT("----- Performing ngbh search -----\n"); MPI_Barrier(ctx -> mpi_communicator); mpi_all_knn_search_kdtree(ctx, dp_info, &tree, &local_tree, ctx -> k); MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; LOG_WRITE("Total time for all knn search w. local kdtrees", elapsed_time); opaque_local_tree = &local_tree; } break; case VP: { vptree_t local_tree; vptree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); build_tree_vptree(&local_tree); // build_tree_kdtree_sample(&local_tree); ctx -> local_data = local_tree.data; elapsed_time = TIME_STOP; LOG_WRITE("Vptrees init and build", elapsed_time); TIME_START; MPI_DB_PRINT("----- Performing ngbh search -----\n"); MPI_Barrier(ctx -> mpi_communicator); mpi_all_knn_search(ctx, dp_info, &tree, &local_tree, ctx -> k); mpi_all_knn_search_vptree(ctx, dp_info, &tree, &local_tree, ctx -> k); MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; LOG_WRITE("Total time for all knn search", elapsed_time) LOG_WRITE("Total time for all knn search w. vptrees", elapsed_time); opaque_local_tree = &local_tree; } break; } TIME_START; Loading Loading @@ -434,40 +475,77 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) TIME_START; int* cl = (int*)MY_MALLOC(ctx -> local_n_points * sizeof(int)); int* cl = (int*)MY_MALLOC(2 * ctx -> local_n_points * sizeof(int)); float_t* data_to_write = (float_t*)MY_MALLOC(ctx -> local_n_points * ctx -> k * sizeof(float_t)); switch(ctx -> local_tree_type) { case KD: { kdtree_t local_tree = *((kdtree_t*)opaque_local_tree); for(int i = 0; i < ctx -> local_n_points; ++i) { cl[i] = ctx -> local_datapoints[i].cluster_idx; idx_t idx = local_tree.__points[i].array_idx; memcpy(data_to_write + idx*ctx -> dims, local_tree.__points[i].data, ctx -> dims*sizeof(float_t)); } kdtree_free(&local_tree); } break; case VP: { vptree_t local_tree = *((vptree_t*)opaque_local_tree); for(int i = 0; i < ctx -> local_n_points; ++i) { cl[i] = ctx -> local_datapoints[i].cluster_idx; idx_t idx = local_tree.__points[i].array_idx; memcpy(data_to_write + idx*ctx -> dims, local_tree.__points[i].data, ctx -> dims*sizeof(float_t)); } vptree_free(&local_tree); } break; default: break; } if(ctx -> world_size <= 32) { big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, ctx -> output_assignment_file); big_ordered_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, ctx -> output_data_file); char out_file[200]; free(cl); snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); big_ordered_buffer_to_file(ctx, ctx->og_idxs, sizeof(idx_t), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); big_ordered_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, out_file); } else { distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, ctx -> output_assignment_file); distributed_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, ctx -> output_data_file); char out_file[200]; free(cl); snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); distributed_buffer_to_file(ctx, ctx->og_idxs, sizeof(idx_t), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); distributed_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, out_file); } elapsed_time = TIME_STOP; LOG_WRITE("Write results to file", elapsed_time); free(cl); free(data_to_write); top_tree_free(ctx, &tree); kdtree_free(&local_tree); //clusters_free(&clusters); free(send_counts); Loading src/tree/kdtree.h +10 −1 Original line number Diff line number Diff line Loading @@ -10,7 +10,7 @@ #include <time.h> #include <float.h> #define DEFAULT_LEAF_SIZE 32 #define DEFAULT_LEAF_SIZE 256 #define ALIGNMENT 64 #define CHECK_ALLOCATION_NO_CTX(x) if(!x){printf("[!!!] Failed allocation: %s at line %d \n", __FILE__, __LINE__ ); exit(1);} Loading Loading @@ -49,6 +49,15 @@ #define MAX(x,y) ((x > y) ? (x) : (y)) #endif #ifndef POINT #define POINT typedef struct point_t { idx_t array_idx; float_t* data; } point_t; #endif typedef struct { float_t* lb; Loading Loading
run_leo +9 −4 Original line number Diff line number Diff line Loading @@ -31,13 +31,18 @@ export PSM2_MQ_RECVREQS_MAX=268435456 rm bb/* mkdir bb OUT_ASSIGNMENT=/leonardo_scratch/large/userexternal/ftomba00/assignment OUT_DATA=/leonardo_scratch/large/userexternal/ftomba00/data OUT_DIR=/leonardo_scratch/large/userexternal/ftomba00/exp1M IN_DATA=/leonardo_work/EUHPC_D18_045 rm -rf ${OUT_DIR} mkdir ${OUT_DIR} #10^6 points time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -k 100 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK} ./main -t f32 -k 100 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -o ${OUT_DIR} ## to modify those one below #time mpirun -n ${SLURM_NTASKS} --map-by core ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA} #34 * 10^6 points Loading
src/common/common.c +1 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ void get_context(global_context_t* ctx) ctx -> local_datapoints = NULL; ctx -> __local_heap_buffers = NULL; ctx -> input_data_in_float32 = -1; ctx -> local_tree_type = KD; ctx -> dims = 0; ctx -> k = 300; ctx -> z = 3; Loading
src/common/common.h +6 −4 Original line number Diff line number Diff line Loading @@ -11,16 +11,16 @@ //#include <stdarg.h> // #define PARALLEL_FIX_BORDERS #define WRITE_SHUFFLED_DATA #define WRITE_NGBH // #define WRITE_SHUFFLED_DATA // #define WRITE_NGBH // #define WRITE_TOP_NODES #define WRITE_DENSITY // #define WRITE_DENSITY // #define WRITE_CLUSTER_ASSIGN_H1 // #define WRITE_BORDERS // #define WRITE_CENTERS_PRE_MERGING // #define WRITE_MERGES_INFO // #define WRITE_MERGING_TABLE #define WRITE_FINAL_ASSIGNMENT // #define WRITE_FINAL_ASSIGNMENT // #define PRINT_NGBH_EXCHANGE_SCHEME // #define PRINT_H2_COMM_SCHEME Loading Loading @@ -162,6 +162,7 @@ typedef struct datapoint_info_t { float_t log_rho_err; // } datapoint_info_t; enum local_tree_type {VP, KD}; typedef struct global_context_t { Loading @@ -188,6 +189,7 @@ typedef struct global_context_t idx_t* og_idxs; //original indexes heap_node_t* __local_heap_buffers; //buffer that stores nearest neighbors MPI_Comm mpi_communicator; //mpi communicator enum local_tree_type local_tree_type; //local tree strategy int input_data_in_float32; char input_data_file[DEFAULT_STR_LEN]; char output_assignment_file[DEFAULT_STR_LEN]; Loading
src/main/main.c +150 −72 Original line number Diff line number Diff line Loading @@ -8,29 +8,6 @@ #include <unistd.h> #include <getopt.h> #ifdef AMONRA #pragma message "Hi, you are on amonra" #define OUT_CLUSTER_ASSIGN "/beegfs/ftomba/phd/results/final_assignment.npy" #define OUT_DATA "/beegfs/ftomba/phd/results/ordered_data.npy" #endif #ifdef LEONARDO #define OUT_CLUSTER_ASSIGN "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/final_assignment.npy" #define OUT_DATA "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/ordered_data.npy" #endif #ifdef LUMI #define OUT_CLUSTER_ASSIGN "~/scratch_dadp/out_dadp/final_assignment.npy" #define OUT_DATA "~/scratch_dadp/out_dadp/ordered_data.npy" #endif #ifndef OUT_CLUSTER_ASSIGN #define OUT_CLUSTER_ASSIGN "final_assignment.npy" #define OUT_DATA "ordered_data.npy" #endif #ifdef THREAD_FUNNELED #define THREAD_LEVEL MPI_THREAD_FUNNELED #else Loading @@ -43,10 +20,10 @@ struct option long_options[] = {"in-data" , required_argument, 0, 'i'}, {"in-dtype", required_argument, 0, 't'}, {"in-dims" , required_argument, 0, 'd'}, {"out-data", optional_argument, 0, 'o'}, {"out-assignment", optional_argument, 0, 'a'}, {"out-directory", required_argument, 0, 'o'}, {"kngbh", optional_argument, 0, 'k'}, {"z", optional_argument, 0, 'z'}, {"knn-strategy", optional_argument, 0, 's'}, {"help", optional_argument, 0, 'h'}, {0, 0, 0, 0} }; Loading @@ -58,12 +35,10 @@ const char* help = "Distributed Advanced Density Peak\n"\ " -d --in-dims (required) number of dimensions of the data file, dadp expects something\n"\ " of the form N x d where N is inferred from the lenght of the\n"\ " data file\n"\ " -o --out-data (optional) output path for the data, the datafile is shuffled between\n"\ " mpi tasks and datapoints are ordered default is `out_data` \n"\ " -a --out-assignment (optional) output path for the cluster assignment output ranges [0 ... Nc - 1]\n"\ " for core points halo points have indices [-Nc ... -1] conversion\n"\ " of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"\ " -o --out-directory (required) output path for the data, cluster assignment and original indices \n"\ " of the datapoints. Produces files like `data.[rank]`, `og_idx.[rank]` and `assignment.[rank]`\n"\ " -k --kngbh (optional) number of nearest neighbors to compute\n"\ " -s --knn-strategy (optional) strategy for local knn (allowed values 'kd' for kdtree, 'vp' for ball-tree (vantage point)\n"\ " -z --z (optional) number of nearest neighbors to compute\n"; void parse_args(global_context_t* ctx, int argc, char** argv) Loading @@ -72,10 +47,8 @@ void parse_args(global_context_t* ctx, int argc, char** argv) int opt; int input_file_set = 0; int input_type_set = 0; snprintf(ctx -> output_assignment_file, DEFAULT_STR_LEN, "%s", OUT_CLUSTER_ASSIGN); snprintf(ctx -> output_data_file, DEFAULT_STR_LEN, "%s", OUT_DATA); while((opt = getopt_long(argc, argv, "i:t:d:o:a:k:z:h", long_options, NULL)) != -1) while((opt = getopt_long(argc, argv, "i:t:d:o:k:z:hs:", long_options, NULL)) != -1) { switch(opt) { Loading Loading @@ -105,9 +78,6 @@ void parse_args(global_context_t* ctx, int argc, char** argv) case 'o': strncpy(ctx -> output_data_file, optarg, DEFAULT_STR_LEN); break; case 'a': strncpy(ctx -> output_assignment_file, optarg, DEFAULT_STR_LEN); break; case 'k': ctx -> k = atoi(optarg); break; Loading @@ -119,6 +89,22 @@ void parse_args(global_context_t* ctx, int argc, char** argv) MPI_Finalize(); exit(0); case 's': if(strncmp("vp", optarg, 2) == 0) { ctx -> local_tree_type = VP; } else if(strncmp("kd", optarg, 2) == 0) { ctx -> local_tree_type = KD; } else { printf("Cannot find valid local tree type selection, exiting\n"); exit(0); } break; default: mpi_printf(ctx, "%s\n", help); MPI_Finalize(); Loading Loading @@ -161,8 +147,7 @@ void print_hello(global_context_t* ctx) mpi_printf(ctx, "Data file .............> %s\n", ctx -> input_data_file); mpi_printf(ctx, "Input Type .............> float%d\n", ctx -> input_data_in_float32 ? 32 : 64); mpi_printf(ctx, "Dimensions .............> %d\n", ctx -> dims); mpi_printf(ctx, "Output data file .......> %s\n", ctx -> output_data_file); mpi_printf(ctx, "Output assignment file -> %s\n", ctx -> output_assignment_file); mpi_printf(ctx, "Output directory .......> %s\n", ctx -> output_data_file); mpi_printf(ctx, "k ......................> %lu\n", ctx -> k); mpi_printf(ctx, "Z ......................> %.2lf\n", ctx -> z); mpi_printf(ctx, "\nRUNNING!\n"); Loading Loading @@ -255,15 +240,33 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) int halo = MY_TRUE; float_t tol = 0.002; if(I_AM_MASTER && ctx -> world_size <= 6) if(ctx -> world_size <= 6) { if(I_AM_MASTER) { test_file_path(ctx -> output_data_file); test_file_path(ctx -> output_assignment_file); char out_file[200]; snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); test_file_path(out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); test_file_path(out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); test_file_path(out_file); } } else { test_distributed_file_path(ctx, ctx -> output_data_file); test_distributed_file_path(ctx, ctx -> output_assignment_file); char out_file[200]; snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); test_distributed_file_path(ctx, out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); test_distributed_file_path(ctx, out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); test_distributed_file_path(ctx, out_file); } Loading Loading @@ -358,10 +361,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) elapsed_time = TIME_STOP; LOG_WRITE("Top kdtree build and domain decomposition", elapsed_time); TIME_START; kdtree_t local_tree; kdtree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); datapoint_info_t* dp_info = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t)); for(uint64_t i = 0; i < ctx -> local_n_points; ++i) { Loading @@ -378,22 +377,64 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) } ctx -> local_datapoints = dp_info; TIME_START; void* opaque_local_tree; switch(ctx -> local_tree_type) { case KD: { kdtree_t local_tree; kdtree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); build_tree_kdtree_parallel(&local_tree); // build_tree_kdtree_sample(&local_tree); ctx -> local_data = local_tree.data; elapsed_time = TIME_STOP; LOG_WRITE("Local trees init and build", elapsed_time); LOG_WRITE("Local kdtrees init and build", elapsed_time); TIME_START; MPI_DB_PRINT("----- Performing ngbh search -----\n"); MPI_Barrier(ctx -> mpi_communicator); mpi_all_knn_search_kdtree(ctx, dp_info, &tree, &local_tree, ctx -> k); MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; LOG_WRITE("Total time for all knn search w. local kdtrees", elapsed_time); opaque_local_tree = &local_tree; } break; case VP: { vptree_t local_tree; vptree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims); build_tree_vptree(&local_tree); // build_tree_kdtree_sample(&local_tree); ctx -> local_data = local_tree.data; elapsed_time = TIME_STOP; LOG_WRITE("Vptrees init and build", elapsed_time); TIME_START; MPI_DB_PRINT("----- Performing ngbh search -----\n"); MPI_Barrier(ctx -> mpi_communicator); mpi_all_knn_search(ctx, dp_info, &tree, &local_tree, ctx -> k); mpi_all_knn_search_vptree(ctx, dp_info, &tree, &local_tree, ctx -> k); MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; LOG_WRITE("Total time for all knn search", elapsed_time) LOG_WRITE("Total time for all knn search w. vptrees", elapsed_time); opaque_local_tree = &local_tree; } break; } TIME_START; Loading Loading @@ -434,40 +475,77 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) TIME_START; int* cl = (int*)MY_MALLOC(ctx -> local_n_points * sizeof(int)); int* cl = (int*)MY_MALLOC(2 * ctx -> local_n_points * sizeof(int)); float_t* data_to_write = (float_t*)MY_MALLOC(ctx -> local_n_points * ctx -> k * sizeof(float_t)); switch(ctx -> local_tree_type) { case KD: { kdtree_t local_tree = *((kdtree_t*)opaque_local_tree); for(int i = 0; i < ctx -> local_n_points; ++i) { cl[i] = ctx -> local_datapoints[i].cluster_idx; idx_t idx = local_tree.__points[i].array_idx; memcpy(data_to_write + idx*ctx -> dims, local_tree.__points[i].data, ctx -> dims*sizeof(float_t)); } kdtree_free(&local_tree); } break; case VP: { vptree_t local_tree = *((vptree_t*)opaque_local_tree); for(int i = 0; i < ctx -> local_n_points; ++i) { cl[i] = ctx -> local_datapoints[i].cluster_idx; idx_t idx = local_tree.__points[i].array_idx; memcpy(data_to_write + idx*ctx -> dims, local_tree.__points[i].data, ctx -> dims*sizeof(float_t)); } vptree_free(&local_tree); } break; default: break; } if(ctx -> world_size <= 32) { big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, ctx -> output_assignment_file); big_ordered_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, ctx -> output_data_file); char out_file[200]; free(cl); snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); big_ordered_buffer_to_file(ctx, ctx->og_idxs, sizeof(idx_t), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); big_ordered_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, out_file); } else { distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, ctx -> output_assignment_file); distributed_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, ctx -> output_data_file); char out_file[200]; free(cl); snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file); distributed_buffer_to_file(ctx, ctx->og_idxs, sizeof(idx_t), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/assignment", ctx->output_data_file); distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, out_file); snprintf(out_file, 200, "%s/data", ctx->output_data_file); distributed_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, out_file); } elapsed_time = TIME_STOP; LOG_WRITE("Write results to file", elapsed_time); free(cl); free(data_to_write); top_tree_free(ctx, &tree); kdtree_free(&local_tree); //clusters_free(&clusters); free(send_counts); Loading
src/tree/kdtree.h +10 −1 Original line number Diff line number Diff line Loading @@ -10,7 +10,7 @@ #include <time.h> #include <float.h> #define DEFAULT_LEAF_SIZE 32 #define DEFAULT_LEAF_SIZE 256 #define ALIGNMENT 64 #define CHECK_ALLOCATION_NO_CTX(x) if(!x){printf("[!!!] Failed allocation: %s at line %d \n", __FILE__, __LINE__ ); exit(1);} Loading Loading @@ -49,6 +49,15 @@ #define MAX(x,y) ((x > y) ? (x) : (y)) #endif #ifndef POINT #define POINT typedef struct point_t { idx_t array_idx; float_t* data; } point_t; #endif typedef struct { float_t* lb; Loading