Loading Makefile +1 −1 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O3 -march=native CFLAGS=-O0 -g LDFLAGS=-lm all: main Loading src/tree/tree.c +136 −28 Original line number Diff line number Diff line Loading @@ -488,7 +488,9 @@ void compute_pure_global_binning(global_context_t *ctx, pointset_t *ps, for (size_t i = 0; i < ps->n_points; ++i) { float_t p = ps->data[i * ps->dims + d]; int bin_idx = (int)((p - ps->lb_box[d]) / bin_w); /* to prevent the border point in the box to have bin_idx == k_global causing invalid memory access */ int bin_idx = MIN((int)((p - ps->lb_box[d]) / bin_w), k_global - 1); //int bin_idx = (int)((p - ps->lb_box[d]) / bin_w), k_global - 1; /* if(bin_idx < 0) { Loading Loading @@ -716,6 +718,7 @@ top_kdtree_node_t* top_tree_generate_node(global_context_t* ctx, top_kdtree_t* t ptr -> lb_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ptr -> ub_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ptr -> owner = -1; ptr -> split_dim = 0.; ++tree -> count; return ptr; Loading Loading @@ -966,6 +969,132 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree } int compute_point_owner(global_context_t* ctx, top_kdtree_t* tree, float_t* data) { top_kdtree_node_t* current_node = tree -> root; int owner = current_node -> owner; while(owner == -1) { /* compute side */ int split_dim = current_node -> split_dim; int side = data[split_dim] > current_node -> split_val; switch (side) { case TOP_TREE_RCH: { current_node = current_node -> rch; } break; case TOP_TREE_LCH: { current_node = current_node -> lch; } break; default: break; } owner = current_node -> owner; } return owner; } /* to partition points around owners */ int partition_data_around_key(int* key, float_t *val, int vec_len, int ref_key , int left, int right) { /* * returns the number of elements less than the pivot */ int store_index = left; int i; /* Move pivot to end */ for (i = left; i < right; ++i) { // if(compare_data_element(array + i*vec_len, array + pivot_index*vec_len, compare_dim ) >= 0){ if (key[i] < ref_key) { swap_data_element(val + store_index * vec_len, val + i * vec_len, vec_len); /* swap keys */ int tmp = key[i]; key[i] = key[store_index]; key[store_index] = tmp; store_index += 1; } } /* Move pivot to its final place */ // swap_data_element(array + (store_index)*vec_len , array + right*vec_len, // vec_len); return store_index; // maybe, update, it works :) } void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { size_t* points_per_proc = (size_t*)malloc(ctx -> world_size * sizeof(size_t)); int* points_owners = (int*) malloc(ctx -> dims * ctx -> local_n_points * sizeof(float_t)); /* compute owner */ for(size_t i = 0; i < ctx -> local_n_points; ++i) { /* tree walk */ points_owners[i] = compute_point_owner(ctx, tree, ctx -> local_data + (i * ctx -> dims)); } int last_idx = 0; int len = ctx -> local_n_points; float_t* curr_data = ctx -> local_data; for(int owner = 1; owner < ctx -> world_size; ++owner) { last_idx = partition_data_around_key(points_owners, ctx -> local_data, ctx -> dims, owner, last_idx, ctx -> local_n_points); points_per_proc[owner - 1] = last_idx; } points_per_proc[ctx -> world_size - 1] = ctx -> local_n_points; for(int i = ctx -> world_size - 1; i > 0; --i) { points_per_proc[i] = points_per_proc[i] - points_per_proc[i - 1]; } /* MPI_DB_PRINT("Points per proc begin: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); } MPI_DB_PRINT("\n"); */ MPI_Allreduce(MPI_IN_PLACE, points_per_proc, ctx -> world_size, MPI_UINT64_T,MPI_SUM, ctx -> mpi_communicator); size_t test_num = 0; for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc[i]; MPI_DB_PRINT("Master has n_points %lu and in node population has %lu points\n", ctx -> n_points, test_num); /* MPI_DB_PRINT("Points per proc after: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); } MPI_DB_PRINT("\n"); */ /* for(int i = 0; i < ctx -> local_n_points; ++i) { MPI_DB_PRINT("%d ", points_owners[i]); if(i % 10 == 0) MPI_DB_PRINT("\n"); } */ free(points_owners); free(points_per_proc); } void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) { float_t *data; Loading Loading @@ -997,6 +1126,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) // read_data_file(ctx,"../norm_data/std_g0163178_Me14_091_0000",MY_TRUE); // ctx -> n_points = 48*5*2000; ctx->n_points = ctx->n_points / ctx->dims; //ctx -> n_points = 48 * 500; mpi_printf(ctx, "Read %lu points in %u dims\n", ctx->n_points, ctx->dims); } Loading Loading @@ -1055,33 +1185,12 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) original_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); float_t incr = 0.05; float_t tol = 0.001; /* for (int d = 0; d < ctx->dims; ++d) { for (float_t f = incr; f < 1; f += incr) { int best_bin_idx; float_t ep; compute_bounding_box_pointset(ctx, &original_ps); compute_pure_global_binning(ctx, &original_ps, global_bin_counts_int, k_global, d); guess_t g = retrieve_guess_pure( ctx, &original_ps, global_bin_counts_int, k_global, d, f); // check_pc_pointset(ctx, &ps, best_guess, d, f); g.ep = check_pc_pointset_parallel(ctx, &original_ps, g, d, f); g = refine_pure_binning(ctx, &original_ps, g, global_bin_counts_int, k_global, d, f, tol); MPI_DB_PRINT("[MASTER] dimension %d searching for %lf found %lf\n", d, f, g.ep); } MPI_DB_PRINT("--------------------------------------\n\n"); } */ float_t tol = 0.002; top_kdtree_t tree; top_tree_init(ctx, &tree); build_top_kdtree(ctx, &original_ps, &tree, k_global, tol); exchange_points(ctx, &tree); top_tree_free(ctx, &tree); Loading @@ -1089,11 +1198,10 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) free(send_counts); free(displacements); // free(pvt_data); if (ctx->mpi_rank == 0) free(data); if (ctx->mpi_rank == 0) free(data); original_ps.data = NULL; free_pointset(&original_ps); // free(pvt_data); free(global_bin_counts_int); } Loading
Makefile +1 −1 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O3 -march=native CFLAGS=-O0 -g LDFLAGS=-lm all: main Loading
src/tree/tree.c +136 −28 Original line number Diff line number Diff line Loading @@ -488,7 +488,9 @@ void compute_pure_global_binning(global_context_t *ctx, pointset_t *ps, for (size_t i = 0; i < ps->n_points; ++i) { float_t p = ps->data[i * ps->dims + d]; int bin_idx = (int)((p - ps->lb_box[d]) / bin_w); /* to prevent the border point in the box to have bin_idx == k_global causing invalid memory access */ int bin_idx = MIN((int)((p - ps->lb_box[d]) / bin_w), k_global - 1); //int bin_idx = (int)((p - ps->lb_box[d]) / bin_w), k_global - 1; /* if(bin_idx < 0) { Loading Loading @@ -716,6 +718,7 @@ top_kdtree_node_t* top_tree_generate_node(global_context_t* ctx, top_kdtree_t* t ptr -> lb_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ptr -> ub_node_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); ptr -> owner = -1; ptr -> split_dim = 0.; ++tree -> count; return ptr; Loading Loading @@ -966,6 +969,132 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree } int compute_point_owner(global_context_t* ctx, top_kdtree_t* tree, float_t* data) { top_kdtree_node_t* current_node = tree -> root; int owner = current_node -> owner; while(owner == -1) { /* compute side */ int split_dim = current_node -> split_dim; int side = data[split_dim] > current_node -> split_val; switch (side) { case TOP_TREE_RCH: { current_node = current_node -> rch; } break; case TOP_TREE_LCH: { current_node = current_node -> lch; } break; default: break; } owner = current_node -> owner; } return owner; } /* to partition points around owners */ int partition_data_around_key(int* key, float_t *val, int vec_len, int ref_key , int left, int right) { /* * returns the number of elements less than the pivot */ int store_index = left; int i; /* Move pivot to end */ for (i = left; i < right; ++i) { // if(compare_data_element(array + i*vec_len, array + pivot_index*vec_len, compare_dim ) >= 0){ if (key[i] < ref_key) { swap_data_element(val + store_index * vec_len, val + i * vec_len, vec_len); /* swap keys */ int tmp = key[i]; key[i] = key[store_index]; key[store_index] = tmp; store_index += 1; } } /* Move pivot to its final place */ // swap_data_element(array + (store_index)*vec_len , array + right*vec_len, // vec_len); return store_index; // maybe, update, it works :) } void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { size_t* points_per_proc = (size_t*)malloc(ctx -> world_size * sizeof(size_t)); int* points_owners = (int*) malloc(ctx -> dims * ctx -> local_n_points * sizeof(float_t)); /* compute owner */ for(size_t i = 0; i < ctx -> local_n_points; ++i) { /* tree walk */ points_owners[i] = compute_point_owner(ctx, tree, ctx -> local_data + (i * ctx -> dims)); } int last_idx = 0; int len = ctx -> local_n_points; float_t* curr_data = ctx -> local_data; for(int owner = 1; owner < ctx -> world_size; ++owner) { last_idx = partition_data_around_key(points_owners, ctx -> local_data, ctx -> dims, owner, last_idx, ctx -> local_n_points); points_per_proc[owner - 1] = last_idx; } points_per_proc[ctx -> world_size - 1] = ctx -> local_n_points; for(int i = ctx -> world_size - 1; i > 0; --i) { points_per_proc[i] = points_per_proc[i] - points_per_proc[i - 1]; } /* MPI_DB_PRINT("Points per proc begin: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); } MPI_DB_PRINT("\n"); */ MPI_Allreduce(MPI_IN_PLACE, points_per_proc, ctx -> world_size, MPI_UINT64_T,MPI_SUM, ctx -> mpi_communicator); size_t test_num = 0; for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc[i]; MPI_DB_PRINT("Master has n_points %lu and in node population has %lu points\n", ctx -> n_points, test_num); /* MPI_DB_PRINT("Points per proc after: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); } MPI_DB_PRINT("\n"); */ /* for(int i = 0; i < ctx -> local_n_points; ++i) { MPI_DB_PRINT("%d ", points_owners[i]); if(i % 10 == 0) MPI_DB_PRINT("\n"); } */ free(points_owners); free(points_per_proc); } void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) { float_t *data; Loading Loading @@ -997,6 +1126,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) // read_data_file(ctx,"../norm_data/std_g0163178_Me14_091_0000",MY_TRUE); // ctx -> n_points = 48*5*2000; ctx->n_points = ctx->n_points / ctx->dims; //ctx -> n_points = 48 * 500; mpi_printf(ctx, "Read %lu points in %u dims\n", ctx->n_points, ctx->dims); } Loading Loading @@ -1055,33 +1185,12 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) original_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); float_t incr = 0.05; float_t tol = 0.001; /* for (int d = 0; d < ctx->dims; ++d) { for (float_t f = incr; f < 1; f += incr) { int best_bin_idx; float_t ep; compute_bounding_box_pointset(ctx, &original_ps); compute_pure_global_binning(ctx, &original_ps, global_bin_counts_int, k_global, d); guess_t g = retrieve_guess_pure( ctx, &original_ps, global_bin_counts_int, k_global, d, f); // check_pc_pointset(ctx, &ps, best_guess, d, f); g.ep = check_pc_pointset_parallel(ctx, &original_ps, g, d, f); g = refine_pure_binning(ctx, &original_ps, g, global_bin_counts_int, k_global, d, f, tol); MPI_DB_PRINT("[MASTER] dimension %d searching for %lf found %lf\n", d, f, g.ep); } MPI_DB_PRINT("--------------------------------------\n\n"); } */ float_t tol = 0.002; top_kdtree_t tree; top_tree_init(ctx, &tree); build_top_kdtree(ctx, &original_ps, &tree, k_global, tol); exchange_points(ctx, &tree); top_tree_free(ctx, &tree); Loading @@ -1089,11 +1198,10 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) free(send_counts); free(displacements); // free(pvt_data); if (ctx->mpi_rank == 0) free(data); if (ctx->mpi_rank == 0) free(data); original_ps.data = NULL; free_pointset(&original_ps); // free(pvt_data); free(global_bin_counts_int); }