Loading Makefile +1 −1 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O3 -g -fopenmp CFLAGS=-O3 -march=native -flto -funroll-loops -fopenmp LDFLAGS=-lm all: main Loading src/adp/adp.c +135 −10 Original line number Diff line number Diff line Loading @@ -733,7 +733,7 @@ clusters_t Heuristic1(global_context_t *ctx) } } /* /** * optimized version * * Generate a mask that keeps track of the point has been eliminating the Loading @@ -741,7 +741,7 @@ clusters_t Heuristic1(global_context_t *ctx) * ends, center, removed centers, and max_rho arrays are populated * * optimized v2 use a queue of center removal and then exchange them */ **/ heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node)); for(idx_t p = 0; p < n; ++p) Loading Loading @@ -779,10 +779,118 @@ clusters_t Heuristic1(global_context_t *ctx) TIME_START; //#define EXP_CENTER_PRUNING #if defined(EXP_CENTER_PRUNING) int all_have_finished = 0; int i_have_finished = 0; datapoint_info_t* tmp_datapoints = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t)); // use strategy similar to the one employed in density estimation // vertical loop, first on k the on n // request points // then process the points MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); //MPI_Win_lock_all(0, win_datapoints); MPI_DB_PRINT("Using vertical loop on centers pruning\n"); int kmax = ctx -> k; for(idx_t j = 1; j < kmax; ++j) { i_have_finished = 1; #if !defined(THREAD_FUNNELED) // request points #pragma omp parallel for schedule(dynamic) for(idx_t p = 0; p < n; ++p) { datapoint_info_t i_point = *(dp_info_ptrs[p]); if(j < i_point.kstar + 1) { #pragma omp atomic write i_have_finished = 0; idx_t jidx = i_point.ngbh[j].array_idx; int owner = foreign_owner(ctx, jidx); /* find if datapoint is halo or not */ idx_t jpos = jidx - ctx -> rank_idx_start[owner]; if(owner == ctx -> mpi_rank) { tmp_datapoints[p] = ctx -> local_datapoints[jpos]; } else { MPI_Get(tmp_datapoints + p, sizeof(datapoint_info_t), MPI_BYTE, owner, jpos * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints); } } } //MPI_Win_flush_all(win_datapoints); MPI_Win_fence(MPI_MODE_NOPUT ^ MPI_MODE_NOSTORE, win_datapoints); // wait for all comms to happen #pragma omp parallel for schedule(dynamic) for(idx_t p = 0; p < n; ++p) { datapoint_info_t i_point = *(dp_info_ptrs[p]); if(j < i_point.kstar + 1) { datapoint_info_t j_point = tmp_datapoints[p]; idx_t jidx = j_point.array_idx; if(j_point.is_center && i_point.g > j_point.g) { // set the lock at position i // actually is the p-th point int owner = foreign_owner(ctx, jidx); //if local process it if(owner == ctx -> mpi_rank) { //acquire the lock idx_t jpos = jidx - ctx -> idx_start; omp_set_lock(lock_array + jpos); if(i_point.g > to_remove_mask[jpos].value) { to_remove_mask[jpos].array_idx = i_point.array_idx; to_remove_mask[jpos].value = i_point.g; } omp_unset_lock(lock_array + jpos); } ////otherwise enqueue for sending else { center_removal_t element = {.rank = owner, .source_id = i_point.array_idx, .target_id = j_point.array_idx, .source_density = i_point.g}; enqueue_center_removal(removal_queues + p, element); } } } } //MPI_Allreduce(&i_have_finished, &all_have_finished, 1, MPI_INT, MPI_LAND, ctx -> mpi_communicator); //if(all_have_finished) break; if(i_have_finished) break; } //MPI_Win_unlock_all(win_datapoints); MPI_Win_fence(0, win_datapoints); free(tmp_datapoints); #else MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); idx_t n_foreign_req = 0; idx_t n_local_req = 0; float_t elapsed_proc = 0.;; #if !defined(THREAD_FUNNELED) #pragma omp parallel for schedule(dynamic) reduction(+:elapsed_proc, n_local_req, n_foreign_req) #endif for(idx_t p = 0; p < n; ++p) { Loading @@ -791,19 +899,31 @@ clusters_t Heuristic1(global_context_t *ctx) for(idx_t j = 1; j < i_point.kstar + 1; ++j) { idx_t jidx = i_point.ngbh[j].array_idx; struct timespec __start, __end; clock_gettime(CLOCK_MONOTONIC,&__start); datapoint_info_t j_point = find_possibly_halo_datapoint_rma(ctx, jidx, win_datapoints); clock_gettime(CLOCK_MONOTONIC,&__end); elapsed_proc += (double)(__end.tv_sec - __start.tv_sec) + (__end.tv_nsec - __start.tv_nsec)/1e9; int owner = foreign_owner(ctx, jidx); if(owner == ctx -> mpi_rank) { n_local_req++; } else { n_foreign_req++; } if(j_point.is_center && i_point.g > j_point.g) { // set the lock at position i // actually is the p-th point int owner = foreign_owner(ctx, jidx); // if local process it idx_t jpos = jidx - ctx -> idx_start; if(owner == ctx -> mpi_rank) { // acquire the lock idx_t jpos = jidx - ctx -> idx_start; omp_set_lock(lock_array + jpos); if(i_point.g > to_remove_mask[jpos].value) { Loading @@ -822,9 +942,14 @@ clusters_t Heuristic1(global_context_t *ctx) } } } MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); MPI_Barrier(ctx -> mpi_communicator); DB_PRINT("Rank %d: foreign requests points %lu out of %lu -> fraction %.2lf time %.2lfs\n", ctx -> mpi_rank, n_foreign_req, n_local_req + n_foreign_req, (float)n_foreign_req/(float)n_local_req, elapsed_proc); MPI_Barrier(ctx -> mpi_communicator); #endif //assemble arrays into a single buffer elapsed_time = TIME_STOP; Loading src/common/common.c +2 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,8 @@ void get_context(global_context_t* ctx) ctx -> __local_heap_buffers = NULL; ctx -> input_data_in_float32 = -1; ctx -> dims = 0; ctx -> k = 300; ctx -> z = 3; } void get_dataset_diagnostics(global_context_t* ctx, float_t* data) Loading src/common/common.h +1 −0 Original line number Diff line number Diff line Loading @@ -149,6 +149,7 @@ typedef struct global_context_t int mpi_rank; //rank of the processor uint32_t dims; //number of dimensions of the dataset idx_t k; //number of neighbors float_t z; //z parameter float_t* local_data; //pointer to the dataset stored into the node float_t* lb_box; //bounding box of the dataset float_t* ub_box; //bounding box of the dataset Loading src/main/main.c +44 −7 Original line number Diff line number Diff line Loading @@ -44,6 +44,8 @@ struct option long_options[] = {"in-dims" , required_argument, 0, 'd'}, {"out-data", optional_argument, 0, 'o'}, {"out-assignment", optional_argument, 0, 'a'}, {"kngbh", optional_argument, 0, 'k'}, {"z", optional_argument, 0, 'z'}, {"help", optional_argument, 0, 'h'}, {0, 0, 0, 0} }; Loading @@ -59,7 +61,9 @@ const char* help = "Distributed Advanced Density Peak\n"\ " mpi tasks and datapoints are ordered default is `out_data` \n"\ " -a --out-assignment (optional) output path for the cluster assignment output ranges [0 ... Nc - 1]\n"\ " for core points halo points have indices [-Nc ... -1] conversion\n"\ " of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"; " of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"\ " -k --kngbh (optional) number of nearest neighbors to compute\n"\ " -z --z (optional) number of nearest neighbors to compute\n"; void parse_args(global_context_t* ctx, int argc, char** argv) { Loading @@ -70,7 +74,7 @@ void parse_args(global_context_t* ctx, int argc, char** argv) snprintf(ctx -> output_assignment_file, DEFAULT_STR_LEN, "%s", OUT_CLUSTER_ASSIGN); snprintf(ctx -> output_data_file, DEFAULT_STR_LEN, "%s", OUT_DATA); while((opt = getopt_long(argc, argv, "i:t:d:o:a:h", long_options, NULL)) != -1) while((opt = getopt_long(argc, argv, "i:t:d:o:a:k:z:h", long_options, NULL)) != -1) { switch(opt) { Loading Loading @@ -103,10 +107,17 @@ void parse_args(global_context_t* ctx, int argc, char** argv) case 'a': strncpy(ctx -> output_assignment_file, optarg, DEFAULT_STR_LEN); break; case 'k': ctx -> k = atoi(optarg); break; case 'z': ctx -> z = atof(optarg); break; case 'h': mpi_printf(ctx, "%s\n", help); MPI_Finalize(); exit(0); default: mpi_printf(ctx, "%s\n", help); MPI_Finalize(); Loading @@ -121,6 +132,30 @@ void parse_args(global_context_t* ctx, int argc, char** argv) if(err){MPI_Finalize(); exit(1);}; } void print_hello(global_context_t* ctx) { char * hello = "" " _ _ \n" " __| | __ _ __| |_ __ \n" " / _` |/ _` |/ _` | '_ \\ \n" "| (_| | (_| | (_| | |_) | \n" " \\__,_|\\__,_|\\__,_| .__/ \n" " |_| \n" " Distributed Advanced Density Peak"; mpi_printf(ctx, "%s\n\n", hello); mpi_printf(ctx, "Configs: \n"); mpi_printf(ctx, "Data file -> %s\n", ctx -> input_data_file); mpi_printf(ctx, "Input Type -> float%d\n", ctx -> input_data_in_float32 ? 32 : 64); mpi_printf(ctx, "Dimensions -> %d\n", ctx -> dims); mpi_printf(ctx, "Output data file -> %s\n", ctx -> output_data_file); mpi_printf(ctx, "Output assignment file -> %s\n", ctx -> output_assignment_file); mpi_printf(ctx, "k -> %lu\n", ctx -> k); mpi_printf(ctx, "Z -> %lf\n", ctx -> z); mpi_printf(ctx, "\nRUNNING!\n"); } int main(int argc, char** argv) { #if defined (_OPENMP) int mpi_provided_thread_level; Loading Loading @@ -177,15 +212,19 @@ int main(int argc, char** argv) { */ int d = 5; int k = 300; float_t* data; //parse command line parse_args(&ctx, argc, argv); print_hello(&ctx); /* * Generate a random matrix of lenght of some kind */ if(ctx.mpi_rank == 0) { simulate_master_read_and_scatter(5, 1000000, &ctx); Loading @@ -211,10 +250,8 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) TIME_DEF double elapsed_time; float_t z = 3; int halo = MY_FALSE; float_t tol = 0.002; int k = 300; if(I_AM_MASTER && ctx -> world_size <= 6) { Loading Loading @@ -356,7 +393,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) MPI_DB_PRINT("----- Performing ngbh search -----\n"); MPI_Barrier(ctx -> mpi_communicator); mpi_ngbh_search(ctx, dp_info, &tree, &local_tree, ctx -> local_data, k); mpi_ngbh_search(ctx, dp_info, &tree, &local_tree, ctx -> local_data, ctx -> k); MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; Loading @@ -376,7 +413,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) compute_density_kstarnn_rma_v2(ctx, id, MY_FALSE); compute_correction(ctx, z); compute_correction(ctx, ctx -> z); elapsed_time = TIME_STOP; LOG_WRITE("Density estimate", elapsed_time) Loading @@ -393,7 +430,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) TIME_START; Heuristic3(ctx, &clusters, z, halo); Heuristic3(ctx, &clusters, ctx -> z, halo); elapsed_time = TIME_STOP; LOG_WRITE("H3", elapsed_time) Loading Loading
Makefile +1 −1 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O3 -g -fopenmp CFLAGS=-O3 -march=native -flto -funroll-loops -fopenmp LDFLAGS=-lm all: main Loading
src/adp/adp.c +135 −10 Original line number Diff line number Diff line Loading @@ -733,7 +733,7 @@ clusters_t Heuristic1(global_context_t *ctx) } } /* /** * optimized version * * Generate a mask that keeps track of the point has been eliminating the Loading @@ -741,7 +741,7 @@ clusters_t Heuristic1(global_context_t *ctx) * ends, center, removed centers, and max_rho arrays are populated * * optimized v2 use a queue of center removal and then exchange them */ **/ heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node)); for(idx_t p = 0; p < n; ++p) Loading Loading @@ -779,10 +779,118 @@ clusters_t Heuristic1(global_context_t *ctx) TIME_START; //#define EXP_CENTER_PRUNING #if defined(EXP_CENTER_PRUNING) int all_have_finished = 0; int i_have_finished = 0; datapoint_info_t* tmp_datapoints = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t)); // use strategy similar to the one employed in density estimation // vertical loop, first on k the on n // request points // then process the points MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); //MPI_Win_lock_all(0, win_datapoints); MPI_DB_PRINT("Using vertical loop on centers pruning\n"); int kmax = ctx -> k; for(idx_t j = 1; j < kmax; ++j) { i_have_finished = 1; #if !defined(THREAD_FUNNELED) // request points #pragma omp parallel for schedule(dynamic) for(idx_t p = 0; p < n; ++p) { datapoint_info_t i_point = *(dp_info_ptrs[p]); if(j < i_point.kstar + 1) { #pragma omp atomic write i_have_finished = 0; idx_t jidx = i_point.ngbh[j].array_idx; int owner = foreign_owner(ctx, jidx); /* find if datapoint is halo or not */ idx_t jpos = jidx - ctx -> rank_idx_start[owner]; if(owner == ctx -> mpi_rank) { tmp_datapoints[p] = ctx -> local_datapoints[jpos]; } else { MPI_Get(tmp_datapoints + p, sizeof(datapoint_info_t), MPI_BYTE, owner, jpos * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints); } } } //MPI_Win_flush_all(win_datapoints); MPI_Win_fence(MPI_MODE_NOPUT ^ MPI_MODE_NOSTORE, win_datapoints); // wait for all comms to happen #pragma omp parallel for schedule(dynamic) for(idx_t p = 0; p < n; ++p) { datapoint_info_t i_point = *(dp_info_ptrs[p]); if(j < i_point.kstar + 1) { datapoint_info_t j_point = tmp_datapoints[p]; idx_t jidx = j_point.array_idx; if(j_point.is_center && i_point.g > j_point.g) { // set the lock at position i // actually is the p-th point int owner = foreign_owner(ctx, jidx); //if local process it if(owner == ctx -> mpi_rank) { //acquire the lock idx_t jpos = jidx - ctx -> idx_start; omp_set_lock(lock_array + jpos); if(i_point.g > to_remove_mask[jpos].value) { to_remove_mask[jpos].array_idx = i_point.array_idx; to_remove_mask[jpos].value = i_point.g; } omp_unset_lock(lock_array + jpos); } ////otherwise enqueue for sending else { center_removal_t element = {.rank = owner, .source_id = i_point.array_idx, .target_id = j_point.array_idx, .source_density = i_point.g}; enqueue_center_removal(removal_queues + p, element); } } } } //MPI_Allreduce(&i_have_finished, &all_have_finished, 1, MPI_INT, MPI_LAND, ctx -> mpi_communicator); //if(all_have_finished) break; if(i_have_finished) break; } //MPI_Win_unlock_all(win_datapoints); MPI_Win_fence(0, win_datapoints); free(tmp_datapoints); #else MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); idx_t n_foreign_req = 0; idx_t n_local_req = 0; float_t elapsed_proc = 0.;; #if !defined(THREAD_FUNNELED) #pragma omp parallel for schedule(dynamic) reduction(+:elapsed_proc, n_local_req, n_foreign_req) #endif for(idx_t p = 0; p < n; ++p) { Loading @@ -791,19 +899,31 @@ clusters_t Heuristic1(global_context_t *ctx) for(idx_t j = 1; j < i_point.kstar + 1; ++j) { idx_t jidx = i_point.ngbh[j].array_idx; struct timespec __start, __end; clock_gettime(CLOCK_MONOTONIC,&__start); datapoint_info_t j_point = find_possibly_halo_datapoint_rma(ctx, jidx, win_datapoints); clock_gettime(CLOCK_MONOTONIC,&__end); elapsed_proc += (double)(__end.tv_sec - __start.tv_sec) + (__end.tv_nsec - __start.tv_nsec)/1e9; int owner = foreign_owner(ctx, jidx); if(owner == ctx -> mpi_rank) { n_local_req++; } else { n_foreign_req++; } if(j_point.is_center && i_point.g > j_point.g) { // set the lock at position i // actually is the p-th point int owner = foreign_owner(ctx, jidx); // if local process it idx_t jpos = jidx - ctx -> idx_start; if(owner == ctx -> mpi_rank) { // acquire the lock idx_t jpos = jidx - ctx -> idx_start; omp_set_lock(lock_array + jpos); if(i_point.g > to_remove_mask[jpos].value) { Loading @@ -822,9 +942,14 @@ clusters_t Heuristic1(global_context_t *ctx) } } } MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints); MPI_Barrier(ctx -> mpi_communicator); DB_PRINT("Rank %d: foreign requests points %lu out of %lu -> fraction %.2lf time %.2lfs\n", ctx -> mpi_rank, n_foreign_req, n_local_req + n_foreign_req, (float)n_foreign_req/(float)n_local_req, elapsed_proc); MPI_Barrier(ctx -> mpi_communicator); #endif //assemble arrays into a single buffer elapsed_time = TIME_STOP; Loading
src/common/common.c +2 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,8 @@ void get_context(global_context_t* ctx) ctx -> __local_heap_buffers = NULL; ctx -> input_data_in_float32 = -1; ctx -> dims = 0; ctx -> k = 300; ctx -> z = 3; } void get_dataset_diagnostics(global_context_t* ctx, float_t* data) Loading
src/common/common.h +1 −0 Original line number Diff line number Diff line Loading @@ -149,6 +149,7 @@ typedef struct global_context_t int mpi_rank; //rank of the processor uint32_t dims; //number of dimensions of the dataset idx_t k; //number of neighbors float_t z; //z parameter float_t* local_data; //pointer to the dataset stored into the node float_t* lb_box; //bounding box of the dataset float_t* ub_box; //bounding box of the dataset Loading
src/main/main.c +44 −7 Original line number Diff line number Diff line Loading @@ -44,6 +44,8 @@ struct option long_options[] = {"in-dims" , required_argument, 0, 'd'}, {"out-data", optional_argument, 0, 'o'}, {"out-assignment", optional_argument, 0, 'a'}, {"kngbh", optional_argument, 0, 'k'}, {"z", optional_argument, 0, 'z'}, {"help", optional_argument, 0, 'h'}, {0, 0, 0, 0} }; Loading @@ -59,7 +61,9 @@ const char* help = "Distributed Advanced Density Peak\n"\ " mpi tasks and datapoints are ordered default is `out_data` \n"\ " -a --out-assignment (optional) output path for the cluster assignment output ranges [0 ... Nc - 1]\n"\ " for core points halo points have indices [-Nc ... -1] conversion\n"\ " of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"; " of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"\ " -k --kngbh (optional) number of nearest neighbors to compute\n"\ " -z --z (optional) number of nearest neighbors to compute\n"; void parse_args(global_context_t* ctx, int argc, char** argv) { Loading @@ -70,7 +74,7 @@ void parse_args(global_context_t* ctx, int argc, char** argv) snprintf(ctx -> output_assignment_file, DEFAULT_STR_LEN, "%s", OUT_CLUSTER_ASSIGN); snprintf(ctx -> output_data_file, DEFAULT_STR_LEN, "%s", OUT_DATA); while((opt = getopt_long(argc, argv, "i:t:d:o:a:h", long_options, NULL)) != -1) while((opt = getopt_long(argc, argv, "i:t:d:o:a:k:z:h", long_options, NULL)) != -1) { switch(opt) { Loading Loading @@ -103,10 +107,17 @@ void parse_args(global_context_t* ctx, int argc, char** argv) case 'a': strncpy(ctx -> output_assignment_file, optarg, DEFAULT_STR_LEN); break; case 'k': ctx -> k = atoi(optarg); break; case 'z': ctx -> z = atof(optarg); break; case 'h': mpi_printf(ctx, "%s\n", help); MPI_Finalize(); exit(0); default: mpi_printf(ctx, "%s\n", help); MPI_Finalize(); Loading @@ -121,6 +132,30 @@ void parse_args(global_context_t* ctx, int argc, char** argv) if(err){MPI_Finalize(); exit(1);}; } void print_hello(global_context_t* ctx) { char * hello = "" " _ _ \n" " __| | __ _ __| |_ __ \n" " / _` |/ _` |/ _` | '_ \\ \n" "| (_| | (_| | (_| | |_) | \n" " \\__,_|\\__,_|\\__,_| .__/ \n" " |_| \n" " Distributed Advanced Density Peak"; mpi_printf(ctx, "%s\n\n", hello); mpi_printf(ctx, "Configs: \n"); mpi_printf(ctx, "Data file -> %s\n", ctx -> input_data_file); mpi_printf(ctx, "Input Type -> float%d\n", ctx -> input_data_in_float32 ? 32 : 64); mpi_printf(ctx, "Dimensions -> %d\n", ctx -> dims); mpi_printf(ctx, "Output data file -> %s\n", ctx -> output_data_file); mpi_printf(ctx, "Output assignment file -> %s\n", ctx -> output_assignment_file); mpi_printf(ctx, "k -> %lu\n", ctx -> k); mpi_printf(ctx, "Z -> %lf\n", ctx -> z); mpi_printf(ctx, "\nRUNNING!\n"); } int main(int argc, char** argv) { #if defined (_OPENMP) int mpi_provided_thread_level; Loading Loading @@ -177,15 +212,19 @@ int main(int argc, char** argv) { */ int d = 5; int k = 300; float_t* data; //parse command line parse_args(&ctx, argc, argv); print_hello(&ctx); /* * Generate a random matrix of lenght of some kind */ if(ctx.mpi_rank == 0) { simulate_master_read_and_scatter(5, 1000000, &ctx); Loading @@ -211,10 +250,8 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) TIME_DEF double elapsed_time; float_t z = 3; int halo = MY_FALSE; float_t tol = 0.002; int k = 300; if(I_AM_MASTER && ctx -> world_size <= 6) { Loading Loading @@ -356,7 +393,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) MPI_DB_PRINT("----- Performing ngbh search -----\n"); MPI_Barrier(ctx -> mpi_communicator); mpi_ngbh_search(ctx, dp_info, &tree, &local_tree, ctx -> local_data, k); mpi_ngbh_search(ctx, dp_info, &tree, &local_tree, ctx -> local_data, ctx -> k); MPI_Barrier(ctx -> mpi_communicator); elapsed_time = TIME_STOP; Loading @@ -376,7 +413,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) compute_density_kstarnn_rma_v2(ctx, id, MY_FALSE); compute_correction(ctx, z); compute_correction(ctx, ctx -> z); elapsed_time = TIME_STOP; LOG_WRITE("Density estimate", elapsed_time) Loading @@ -393,7 +430,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) TIME_START; Heuristic3(ctx, &clusters, z, halo); Heuristic3(ctx, &clusters, ctx -> z, halo); elapsed_time = TIME_STOP; LOG_WRITE("H3", elapsed_time) Loading