Commit fe30144a authored by lykos98's avatar lykos98
Browse files

tree build complete, working on diagnostics

parent dedd2d26
Loading
Loading
Loading
Loading
+140 −55
Original line number Diff line number Diff line
@@ -378,8 +378,6 @@ float_t check_pc_pointset_parallel(global_context_t *ctx, pointset_t *ps, guess_
}

void compute_bounding_box_pointset(global_context_t *ctx, pointset_t *ps) {
	ps->lb_box = (float_t *)malloc(ps->dims * sizeof(float_t));
	ps->ub_box = (float_t *)malloc(ps->dims * sizeof(float_t));

	for (size_t d = 0; d < ps->dims; ++d)
	{
@@ -432,6 +430,8 @@ void compute_bounding_box_pointset(global_context_t *ctx, pointset_t *ps) {
	lb[d], ub[d]); MPI_DB_PRINT("\n");
	*/



	#undef local_data
	#undef lb
	#undef ub
@@ -466,6 +466,7 @@ void compute_adaptive_binning_pointset(global_context_t *ctx, pointset_t *ps,
		qsort(ps->data, ps->n_points, ps->dims * sizeof(float_t), compare_data_element_sort);
	}


	/*	
	 * Now what is more convenient? We have to also
	 * keep track of who owns the most "median thing"
@@ -781,7 +782,9 @@ int retrieve_guess_adaptive(global_context_t *ctx, pointset_t *ps,

	float_t x_guess = (pc - y0) / (y1 - y0) * (x1 - x0) + x0;

	/*
	MPI_DB_PRINT("[MASTER] best guess @ %lf is %lf on bin %d on dimension %d --- x0 %lf x1 %lf y0 %lf y1 %lf\n", pc, x_guess, idx, d, x0, x1, y0, y1);
	*/

	/* find nearest point btw guess */

@@ -862,11 +865,13 @@ guess_t retrieve_guess_pure(global_context_t *ctx, pointset_t *ps,

	float_t x_guess = (pc - y0) / (y1 - y0) * (x1 - x0) + x0;

		
	/*
	MPI_DB_PRINT("[MASTER] best guess @ %lf is %lf on bin %d on dimension %d --- x0 %lf x1 %lf y0 %lf y1 %lf\n",pc, x_guess,idx, d, x0, x1, y0, y1);
	*/
	


	guess_t g = {.bin_idx = idx, .x_guess = x_guess};
	return g;
}
@@ -979,22 +984,42 @@ void compute_pure_global_binning(global_context_t *ctx, pointset_t *ps,
{
	/* compute binning of data along dimension d */
	uint64_t *local_bin_count = (uint64_t *)malloc(k_global * sizeof(uint64_t));
	//MPI_DB_PRINT("%p %p %p %p %p\n", local_bin_count, global_bin_counts, ps -> data, ps -> lb_box, ps -> ub_box);
	//DB_PRINT("rank %d npoints %lu %p %p %p %p %p\n",ctx -> mpi_rank, ps -> n_points, local_bin_count, global_bin_counts, ps -> data, ps -> lb_box, ps -> ub_box);
	for (size_t k = 0; k < k_global; ++k) 
	{
		local_bin_count[k] = 0;
		global_bin_counts[k] = 0;
	}

	float_t bin_w = (ps->ub_box[d] - ps->lb_box[d]) / k_global;
	/*
	MPI_DB_PRINT("[PS BOUNDING BOX %d]: ", ctx -> mpi_rank);
	for(size_t d = 0; d < ps -> dims; ++d) MPI_DB_PRINT("d%d:[%lf, %lf] ",(int)d, ps -> lb_box[d], ps -> ub_box[d]); MPI_DB_PRINT("\n");
	MPI_DB_PRINT("\n");
	*/
	


	float_t bin_w = (ps-> ub_box[d] - ps->lb_box[d]) / (float_t)k_global;

	for (size_t i = 0; i < ps->n_points; ++i) 
	{
		float_t p = ps->data[i * ps->dims + d];
		int bin_idx = (int)((p - ps->lb_box[d]) / bin_w);
		/*
		if(bin_idx < 0) 
		{
			DB_PRINT("rank %d qua %lf %lf %d %lf\n",ctx -> mpi_rank, (p - ps->lb_box[d]), (p - ps->lb_box[d]) / bin_w, bin_idx, bin_w);
			DB_PRINT("[PS BOUNDING BOX %d i have %d]: ", ctx -> mpi_rank,d);
			for(size_t d = 0; d < ps -> dims; ++d) DB_PRINT("d%d:[%lf, %lf] ",(int)d, ps -> lb_box[d], ps -> ub_box[d]); MPI_DB_PRINT("\n");
			DB_PRINT("\n");
		}
		*/
		local_bin_count[bin_idx]++;
	}

	MPI_Allreduce(local_bin_count, global_bin_counts, k_global, MPI_UNSIGNED_LONG, MPI_SUM, ctx->mpi_communicator);
	free(local_bin_count);
}

int partition_data_around_value(float_t *array, int vec_len, int compare_dim,
@@ -1093,8 +1118,8 @@ guess_t refine_pure_binning(global_context_t *ctx, pointset_t *ps,
		tmp_ps.n_points = end_idx - start_idx;
		tmp_ps.data = ps->data + start_idx * ps->dims;
		tmp_ps.dims = ps->dims;
		tmp_ps.lb_box = NULL;
		tmp_ps.ub_box = NULL;
		tmp_ps.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));
		tmp_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));

		compute_bounding_box_pointset(ctx, &tmp_ps);

@@ -1146,8 +1171,6 @@ void free_queue(partition_queue_t *pq) { free(pq->data); }

void get_pointset_from_partition(pointset_t *ps, partition_t *part) 
{
	ps->lb_box = NULL;
	ps->ub_box = NULL;
	ps->n_points = part->n_points;
	ps->data 	 = part->base_ptr;
	ps->n_points = part->n_points;
@@ -1184,6 +1207,11 @@ void top_tree_init(global_context_t *ctx, top_kdtree_t *tree)

void top_tree_free(global_context_t *ctx, top_kdtree_t *tree) 
{
	for(int i = 0; i < tree -> count; ++i)
	{
		if(tree -> _nodes[i].node_box_lb) free(tree -> _nodes[i].node_box_lb);
		if(tree -> _nodes[i].node_box_ub) free(tree -> _nodes[i].node_box_ub);
	}
	free(tree->_nodes);
	return;
}
@@ -1197,11 +1225,18 @@ top_kdtree_node_t* top_tree_generate_node(global_context_t* ctx, top_kdtree_t* t
		tree->_capacity = new_cap;
	}
	top_kdtree_node_t* ptr = tree -> _nodes + tree -> count;
	ptr -> node_box_lb = (float_t*)malloc(ctx -> dims * sizeof(float_t));
	ptr -> node_box_ub = (float_t*)malloc(ctx -> dims * sizeof(float_t));
	++tree -> count;
	return ptr;
 
}

void compute_boxes(global_context_t* ctx, top_kdtree_t* tree)
{
	return;
}

void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree_t *tree, int n_bins, float_t tolerance) 
{
	size_t tot_n_points = 0;
@@ -1231,33 +1266,68 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree

	enqueue_partition(&queue, current_partition);
	pointset_t current_pointset;
	current_pointset.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));
	current_pointset.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));

	while (queue.count) 
	{
		/*dequeue the partition to process */
		current_partition = dequeue_partition(&queue);

		/* generate e pointset for that partition */

		get_pointset_from_partition(&current_pointset, &current_partition);
		current_pointset.dims = ctx->dims;
		/* handle partition */
		compute_bounding_box_pointset(ctx, &current_pointset);
		float_t fraction = (current_partition.n_procs / 2) / (float_t)current_partition.n_procs;
		guess_t g = compute_median_pure_binning(ctx, &current_pointset, fraction, selected_dim, n_bins, tolerance);
		int pv = partition_data_around_value(current_pointset.data, ctx->dims, selected_dim, 0, current_pointset.n_points, g.x_guess);

		/*generate a tree node */

		top_kdtree_node_t* current_node = current_node = top_tree_generate_node(ctx, tree);
		/* insert node */
		MPI_DB_PRINT("Handling partition: \n\tcurrent_node %p, \n\tdim %d, \n\tn_points %d, \n\tstart_proc %d, \n\tn_procs %d, \n\tparent %p\n", 
				current_node,
				current_partition.d,
				current_partition.n_points,
				current_partition.start_proc,
				current_partition.n_procs,
				current_partition.parent);
		MPI_DB_PRINT("-------------------\n\n");

		switch (current_partition.lr) {
			case TOP_TREE_LCH:
				if(current_partition.parent)
				{
					current_node -> parent = current_partition.parent;
					current_node -> parent -> lch = current_node;
				}
				break;

		/*
		 *
		 * if points in the pointset is less than exp point per node then create a
		 * leaf actually better to have a margin of x percent around ppn
		 *
		 */
			case TOP_TREE_RCH:
				if(current_partition.parent)
				{
					current_node -> parent = current_partition.parent;
					current_node -> parent -> rch = current_node;
				}
				break;
			default:
				break;
		}

		top_kdtree_node_t* current_node;
		current_node -> data = g.x_guess;
		current_node -> split_dim = selected_dim;
		current_node -> parent = current_partition.parent;
		current_node -> lch = NULL;
		current_node -> rch = NULL;


		size_t points_left = current_partition.n_points * fraction;
		/* handle partition */
		if(current_partition.n_procs > 1)
		{
			float_t fraction = (current_partition.n_procs / 2) / (float_t)current_partition.n_procs;
			guess_t g = compute_median_pure_binning(ctx, &current_pointset, fraction, current_partition.d, n_bins, tolerance);
			int pv = partition_data_around_value(current_pointset.data, ctx->dims, current_partition.d, 0, current_pointset.n_points, g.x_guess);

			current_node -> data = g.x_guess;

			size_t points_left = (size_t)pv;
			size_t points_right = current_partition.n_points - points_left;

			int procs_left = current_partition.n_procs * fraction;
@@ -1267,6 +1337,7 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree
			partition_t left_partition = {
				.n_points = points_left, 
				.n_procs = procs_left,
				.start_proc = current_partition.start_proc,
				.parent = current_node,
				.lr 	= TOP_TREE_LCH,
				.base_ptr = current_pointset.data,
@@ -1276,15 +1347,26 @@ void build_top_kdtree(global_context_t *ctx, pointset_t *og_pointset, top_kdtree
			partition_t right_partition = {
				.n_points = points_right, 
				.n_procs = procs_right,
				.start_proc = current_partition.start_proc + procs_left,
				.parent = current_node,
				.lr 	= TOP_TREE_RCH,
			.base_ptr = current_pointset.data + pv,
				.base_ptr = current_pointset.data + pv * current_pointset.dims,
				.d = next_dimension
			};

		/* get left and right pointset */
			enqueue_partition(&queue, left_partition);
			enqueue_partition(&queue, right_partition);
			MPI_Barrier(ctx -> mpi_communicator);
		}
		else
		{
			current_node -> owner = current_partition.start_proc;
		}
		/* set the root */
		if(current_node -> parent == NULL) tree -> root = current_node;
	}

	MPI_DB_PRINT("Root is %p\n", tree -> root);
	free_queue(&queue);
}

@@ -1365,11 +1447,12 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
	original_ps.data = ctx->local_data;
	original_ps.dims = ctx->dims;
	original_ps.n_points = ctx->local_n_points;
	original_ps.lb_box = NULL;
	original_ps.ub_box = NULL;
	original_ps.lb_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));
	original_ps.ub_box = (float_t*)malloc(ctx -> dims * sizeof(float_t));

	float_t incr = 0.05;
	float_t tol = 0.001;
	/*
	for (int d = 0; d < ctx->dims; ++d) 
	{
		for (float_t f = incr; f < 1; f += incr) 
@@ -1390,13 +1473,15 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
		}
		MPI_DB_PRINT("--------------------------------------\n\n");
	}
	*/

	top_kdtree_t tree;
	top_tree_init(ctx, &tree);
	build_top_kdtree(ctx, &original_ps, &tree, k_global, tol);

	top_tree_free(ctx, &tree);

	// compute_bounding_box(ctx);
	// global_binning_check(ctx, data,d, k_global);
	// retrieve_pc(ctx, global_bin_counts, best_guess, k_global, d, f);
	// check_pc(ctx, best_guess, data, d, f);

	// compute_medians_and_check(ctx,data);

	free(send_counts);
	free(displacements);
+3 −5
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ typedef struct partition_t
{
	int d;
	int n_procs;
	int start_proc;
	size_t n_points;
	float_t* base_ptr;
	int lr;
@@ -41,14 +42,11 @@ typedef struct partition_queue_t
typedef struct top_kdtree_node_t
{
	float_t data;
	//float_t* node_box_lb; //Needed? 
	//float_t* node_box_ub; //Needed?
	float_t* node_box_lb; //Needed? 
	float_t* node_box_ub; //Needed?
	int owner;
	int split_dim;
	int is_leaf;
	size_t n_points;
	float_t* lb_box;
	float_t* ub_box;
	struct top_kdtree_node_t* lch;
	struct top_kdtree_node_t* rch;
	struct top_kdtree_node_t* parent;