diff --git a/src/adp/adp.c b/src/adp/adp.c index 56f946852ec1efe91b6a973217a4306a47435f70..f28cbdeab78c390cdb52d2732adf83a2d60646cf 100644 --- a/src/adp/adp.c +++ b/src/adp/adp.c @@ -2,6 +2,7 @@ #include "mpi.h" #include #include +#include #include #include @@ -1940,6 +1941,25 @@ void merge_A_into_B(idx_t* who_amI, idx_t cluster_A, idx_t cluster_B, idx_t n) return; } +idx_t find_root(idx_t* cluster, idx_t index) +{ + + idx_t root = index; + while(root != cluster[root]) + { + root = cluster[root]; + } + return root; +} + +void union_A_into_B(idx_t* cluster, idx_t a, idx_t b) +{ + idx_t root_a = find_root(cluster, a); + idx_t root_b = find_root(cluster, b); + + cluster[root_a] = root_b; +} + void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z, idx_t* surviving_clusters, datapoint_info_t* centers_dp) { datapoint_info_t* dp_info = ctx -> local_datapoints; @@ -2016,21 +2036,37 @@ void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z, struct timespec start_merge, end_merge; #endif + struct timespec start_epoch, end_epoch; + + idx_t slice = merge_count / 20; + idx_t actual_merges = 0; + + clock_gettime(CLOCK_MONOTONIC, &start_epoch); + for( idx_t m = 0; m < merge_count; m++ ) { #if defined(WRITE_MERGES_INFO) clock_gettime(CLOCK_MONOTONIC, &start_merge); #endif - // print progress - if(merge_count > 1e5) + // print progress diagnostics + if(merge_count > 1e5 && (m % slice == 0 || m == merge_count - 1)) { - int slice = merge_count / 20; - if(m % slice == 0 || m == merge_count - 1) printf("Merging progress: %lu / %lu -> %.2f \n", - m, merge_count, (float)m/(float)merge_count * 100.); + clock_gettime(CLOCK_MONOTONIC, &end_epoch); + + float elapsed_time = (float)(end_epoch.tv_sec - start_epoch.tv_sec) - + (float)(end_epoch.tv_nsec - start_epoch.tv_nsec)/1e9; + + printf("Merging progress: %lu / %lu -> %.2f .. elapsed time: %.2f .. eta: %.2f .. avg per merge %e .. frac merges %f\n", + m, merge_count, (float)m/(float)merge_count * 100., + elapsed_time, elapsed_time/(float)m * (float)merge_count, + elapsed_time/m, (float)actual_merges/(float)slice); + actual_merges = 0; } - #define src surviving_clusters[merging_table[m].source] - #define trg surviving_clusters[merging_table[m].target] + // idx_t src = surviving_clusters[merging_table[m].source]; + // idx_t trg = surviving_clusters[merging_table[m].target]; + idx_t src = find_root(surviving_clusters, merging_table[m].source); + idx_t trg = find_root(surviving_clusters, merging_table[m].target); /* * Enforce a that in case of symmetric merging condition the lowest idx cluster @@ -2058,6 +2094,8 @@ void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z, float_t dens_border_err = b.error; int i_have_to_merge = is_a_merging(dens1,dens1_err,dens2,dens2_err,dens_border,dens_border_err,Z); + actual_merges += (i_have_to_merge && src != trg); + switch (i_have_to_merge && src != trg) { case 1: @@ -2079,7 +2117,8 @@ void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z, */ fix_sparse_borders_A_into_B(new_src, new_trg, cluster); - merge_A_into_B(surviving_clusters, new_src, new_trg, nclus ); + union_A_into_B(surviving_clusters, new_src, new_trg); + //merge_A_into_B(surviving_clusters, new_src, new_trg, nclus ); } break; @@ -2099,11 +2138,15 @@ void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z, fflush(f); #endif - - #undef src - #undef trg } + #pragma omp parallel for + for(idx_t i = 0; i < nclus; ++i) + { + surviving_clusters[i] = find_root(surviving_clusters, i); + } + + #if defined(WRITE_MERGES_INFO) fclose(f); #endif @@ -2214,6 +2257,22 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo) clock_gettime(CLOCK_MONOTONIC, &start_tot); + // if(I_AM_MASTER) + // { + // // reallocate to keep memory nearby + // for(idx_t i = 0; i < cluster -> centers.count; ++i) + // { + // idx_t n_borders = cluster->sparse_borders[i].count; + // sparse_border_t* tmp_adj_list = (sparse_border_t*)MY_MALLOC(n_borders * sizeof(sparse_border_t)); + // memcpy(tmp_adj_list, cluster->sparse_borders[i].data, n_borders * sizeof(sparse_border_t)); + // free(cluster->sparse_borders[i].data); + // cluster->sparse_borders[i].data = tmp_adj_list; + // + // printf("%lu\n", i); + // fflush(stdout); + // } + // } + datapoint_info_t* dp_info = ctx -> local_datapoints; idx_t nclus = cluster -> centers.count; diff --git a/src/main/main.c b/src/main/main.c index 0863c9917120ca8984abd91ed1decd2803018798..2bc6aa26d0d6261790bbf12c75e7b6ca64d579b9 100644 --- a/src/main/main.c +++ b/src/main/main.c @@ -429,6 +429,9 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) LOG_WRITE("H2", elapsed_time) + // free ngbh + MPI_Free_mem(ctx -> __local_heap_buffers); + ctx -> __local_heap_buffers = NULL; TIME_START; Heuristic3(ctx, &clusters, ctx -> z, halo); elapsed_time = TIME_STOP; diff --git a/src/tree/tree.c b/src/tree/tree.c index 3014c3e369a48ba1101b047af748f1673fab6dab..d3e0adabb58e36ad912d540e6c86ae24b44b2abf 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -29,7 +29,8 @@ //#define MAX_MSG_SIZE 4294967296 /* Used slices of 10 mb ? Really good? Maybe at the cause of TID thing */ -#define MAX_MSG_SIZE (10000 * k * sizeof(heap_node)) +// #define MAX_MSG_SIZE (10000 * k * sizeof(heap_node)) +#define MAX_MSG_SIZE (100000 * k * sizeof(heap_node)) #define TOP_TREE_RCH 1