Loading src/tree/tree.c +56 −23 Original line number Diff line number Diff line Loading @@ -958,7 +958,7 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree MPI_DB_PRINT("Root is %p\n", tree -> root); if(I_AM_MASTER) { tree_print(ctx, tree -> root); //tree_print(ctx, tree -> root); write_nodes_to_file(ctx, tree, "bb/nodes_50_blobs_more_var.csv"); } Loading Loading @@ -1031,8 +1031,9 @@ int partition_data_around_key(int* key, float_t *val, int vec_len, int ref_key , void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { size_t* points_per_proc = (size_t*)malloc(ctx -> world_size * sizeof(size_t)); int* points_per_proc = (int*)malloc(ctx -> world_size * sizeof(int)); int* points_owners = (int*)malloc(ctx -> dims * ctx -> local_n_points * sizeof(float_t)); int* partition_offset = (int*)malloc(ctx -> world_size * sizeof(int)); /* compute owner */ for(size_t i = 0; i < ctx -> local_n_points; ++i) { Loading @@ -1044,9 +1045,12 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) int last_idx = 0; int len = ctx -> local_n_points; float_t* curr_data = ctx -> local_data; partition_offset[0] = 0; for(int owner = 1; owner < ctx -> world_size; ++owner) { last_idx = partition_data_around_key(points_owners, ctx -> local_data, ctx -> dims, owner, last_idx, ctx -> local_n_points); partition_offset[owner] = last_idx; points_per_proc[owner - 1] = last_idx; } Loading @@ -1060,39 +1064,68 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) /* MPI_DB_PRINT("Points per proc begin: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); } MPI_DB_PRINT("\n"); */ MPI_Allreduce(MPI_IN_PLACE, points_per_proc, ctx -> world_size, MPI_UINT64_T,MPI_SUM, ctx -> mpi_communicator); int* points_per_proc_all = (int*)malloc(ctx -> world_size * sizeof(int)); MPI_Allreduce(MPI_IN_PLACE, points_per_proc_all, ctx -> world_size, MPI_INT,MPI_SUM, ctx -> mpi_communicator); size_t test_num = 0; for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc[i]; for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc_all[i]; MPI_DB_PRINT("Master has n_points %lu and in node population has %lu points\n", ctx -> n_points, test_num); /* MPI_DB_PRINT("Points per proc after: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); MPI_DB_PRINT("%lu ", points_per_proc_all[i]); } MPI_DB_PRINT("\n"); free(points_per_proc_all); */ int* rcvcount = (int*)malloc(ctx -> world_size * sizeof(int)); int* displs = (int*)malloc(ctx -> world_size * sizeof(int)); float_t* rcvbuffer = NULL; int tot_count = 0; for(int rcv = 0; rcv < ctx -> world_size; ++rcv) { /* recieve the number of points to recieve from each proc */ MPI_Gather(&(points_per_proc[rcv]), 1, MPI_INT, rcvcount, 1, MPI_INT, rcv, ctx -> mpi_communicator); float_t* send_buffer = ctx -> local_data + (ctx -> dims * partition_offset[rcv]); /* for(int i = 0; i < ctx -> local_n_points; ++i) /* if I am the reciever recieve */ if(rcv == ctx -> mpi_rank) { MPI_DB_PRINT("%d ", points_owners[i]); if(i % 10 == 0) MPI_DB_PRINT("\n"); displs[0] = 0; for(int i = 1; i < ctx -> world_size; ++i) displs[i] = displs[i - 1] + rcvcount[i - 1]; /*multiply for number of elements */ for(int i = 0; i < ctx -> world_size; ++i) { displs[i] = displs[i] * ctx -> dims; rcvcount[i] = rcvcount[i] * ctx -> dims; tot_count += rcvcount[i]; } */ //DB_PRINT("[RANK %d] is recieving %d elements %d points\n", rcv, tot_count, tot_count / ctx -> dims); rcvbuffer = (float_t*)malloc(tot_count * sizeof(float_t)); } MPI_Gatherv(send_buffer, points_per_proc[rcv], MPI_MY_FLOAT, rcvbuffer, rcvcount, displs, MPI_MY_FLOAT, rcv, ctx -> mpi_communicator); } ctx -> local_n_points = tot_count; /* free prv pointer */ free(ctx -> local_data); ctx -> local_data = rcvbuffer; free(points_owners); free(points_per_proc); free(partition_offset); free(rcvcount); free(displs); } void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) Loading Loading @@ -1184,7 +1217,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) original_ps.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); original_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); float_t incr = 0.05; float_t tol = 0.002; top_kdtree_t tree; Loading @@ -1196,6 +1228,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) free(send_counts); free(displacements); Loading Loading
src/tree/tree.c +56 −23 Original line number Diff line number Diff line Loading @@ -958,7 +958,7 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree MPI_DB_PRINT("Root is %p\n", tree -> root); if(I_AM_MASTER) { tree_print(ctx, tree -> root); //tree_print(ctx, tree -> root); write_nodes_to_file(ctx, tree, "bb/nodes_50_blobs_more_var.csv"); } Loading Loading @@ -1031,8 +1031,9 @@ int partition_data_around_key(int* key, float_t *val, int vec_len, int ref_key , void exchange_points(global_context_t* ctx, top_kdtree_t* tree) { size_t* points_per_proc = (size_t*)malloc(ctx -> world_size * sizeof(size_t)); int* points_per_proc = (int*)malloc(ctx -> world_size * sizeof(int)); int* points_owners = (int*)malloc(ctx -> dims * ctx -> local_n_points * sizeof(float_t)); int* partition_offset = (int*)malloc(ctx -> world_size * sizeof(int)); /* compute owner */ for(size_t i = 0; i < ctx -> local_n_points; ++i) { Loading @@ -1044,9 +1045,12 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) int last_idx = 0; int len = ctx -> local_n_points; float_t* curr_data = ctx -> local_data; partition_offset[0] = 0; for(int owner = 1; owner < ctx -> world_size; ++owner) { last_idx = partition_data_around_key(points_owners, ctx -> local_data, ctx -> dims, owner, last_idx, ctx -> local_n_points); partition_offset[owner] = last_idx; points_per_proc[owner - 1] = last_idx; } Loading @@ -1060,39 +1064,68 @@ void exchange_points(global_context_t* ctx, top_kdtree_t* tree) /* MPI_DB_PRINT("Points per proc begin: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); } MPI_DB_PRINT("\n"); */ MPI_Allreduce(MPI_IN_PLACE, points_per_proc, ctx -> world_size, MPI_UINT64_T,MPI_SUM, ctx -> mpi_communicator); int* points_per_proc_all = (int*)malloc(ctx -> world_size * sizeof(int)); MPI_Allreduce(MPI_IN_PLACE, points_per_proc_all, ctx -> world_size, MPI_INT,MPI_SUM, ctx -> mpi_communicator); size_t test_num = 0; for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc[i]; for(int i = 0; i < ctx -> world_size; ++i) test_num += points_per_proc_all[i]; MPI_DB_PRINT("Master has n_points %lu and in node population has %lu points\n", ctx -> n_points, test_num); /* MPI_DB_PRINT("Points per proc after: "); for(int i = 0; i < ctx -> world_size; ++i) { MPI_DB_PRINT("%lu ", points_per_proc[i]); MPI_DB_PRINT("%lu ", points_per_proc_all[i]); } MPI_DB_PRINT("\n"); free(points_per_proc_all); */ int* rcvcount = (int*)malloc(ctx -> world_size * sizeof(int)); int* displs = (int*)malloc(ctx -> world_size * sizeof(int)); float_t* rcvbuffer = NULL; int tot_count = 0; for(int rcv = 0; rcv < ctx -> world_size; ++rcv) { /* recieve the number of points to recieve from each proc */ MPI_Gather(&(points_per_proc[rcv]), 1, MPI_INT, rcvcount, 1, MPI_INT, rcv, ctx -> mpi_communicator); float_t* send_buffer = ctx -> local_data + (ctx -> dims * partition_offset[rcv]); /* for(int i = 0; i < ctx -> local_n_points; ++i) /* if I am the reciever recieve */ if(rcv == ctx -> mpi_rank) { MPI_DB_PRINT("%d ", points_owners[i]); if(i % 10 == 0) MPI_DB_PRINT("\n"); displs[0] = 0; for(int i = 1; i < ctx -> world_size; ++i) displs[i] = displs[i - 1] + rcvcount[i - 1]; /*multiply for number of elements */ for(int i = 0; i < ctx -> world_size; ++i) { displs[i] = displs[i] * ctx -> dims; rcvcount[i] = rcvcount[i] * ctx -> dims; tot_count += rcvcount[i]; } */ //DB_PRINT("[RANK %d] is recieving %d elements %d points\n", rcv, tot_count, tot_count / ctx -> dims); rcvbuffer = (float_t*)malloc(tot_count * sizeof(float_t)); } MPI_Gatherv(send_buffer, points_per_proc[rcv], MPI_MY_FLOAT, rcvbuffer, rcvcount, displs, MPI_MY_FLOAT, rcv, ctx -> mpi_communicator); } ctx -> local_n_points = tot_count; /* free prv pointer */ free(ctx -> local_data); ctx -> local_data = rcvbuffer; free(points_owners); free(points_per_proc); free(partition_offset); free(rcvcount); free(displs); } void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) Loading Loading @@ -1184,7 +1217,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) original_ps.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); original_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t)); float_t incr = 0.05; float_t tol = 0.002; top_kdtree_t tree; Loading @@ -1196,6 +1228,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) free(send_counts); free(displacements); Loading