Commit 92c5e4a9 authored by lykos98's avatar lykos98
Browse files

added slimmified heap

parent 77a1df52
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -17,9 +17,9 @@ The suggestion is to run it with one mpi task per socket.

 - [x] ~~arugment parser~~
 - [x] ~~H2: graph reduction~~
 - [x] ~~kdtree: implement slim heap~~
 - [ ] prettify overall stdout
 - [ ] H1: implementation of lock free centers elimination
 - [ ] kdtree: implement slim heap
 - [ ] kdtree: optimization an profiling
 - [ ] io: curation of IO using mpi IO or other solutions 
+20 −23
Original line number Diff line number Diff line
@@ -55,7 +55,7 @@ float_t compute_ID_two_NN_ML(global_context_t* ctx, datapoint_info_t* dp_info, i
    float_t log_mus = 0.;
    for(idx_t i = 0; i < n; ++i)
    {
        log_mus += 0.5 * log(dp_info[i].ngbh.data[2].value/dp_info[i].ngbh.data[1].value);
        log_mus += 0.5 * log(dp_info[i].ngbh[2].value/dp_info[i].ngbh[1].value);
    }

    float_t d = 0;
@@ -82,7 +82,7 @@ float_t get_j_ksel_dist(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win expo
    if(owner == ctx -> mpi_rank)
    {
        idx_t pos = j - ctx -> idx_start;
        return ctx -> local_datapoints[pos].ngbh.data[ksel].value;
        return ctx -> local_datapoints[pos].ngbh[ksel].value;
    }
    else
    {
@@ -103,7 +103,7 @@ idx_t get_j_ksel_idx(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win exposed
    if(owner == ctx -> mpi_rank)
    {
        idx_t pos = j - ctx -> idx_start;
        return ctx -> local_datapoints[pos].ngbh.data[ksel].array_idx;
        return ctx -> local_datapoints[pos].ngbh[ksel].array_idx;
    }
    else
    {
@@ -132,7 +132,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int


    MPI_Barrier(ctx -> mpi_communicator);
    idx_t k = ctx -> local_datapoints[0].ngbh.N;
    idx_t k = ctx -> k;

    struct timespec start_tot, finish_tot;
    double elapsed_tot;
@@ -145,7 +145,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
		clock_gettime(CLOCK_MONOTONIC, &start_tot);
	}

    idx_t kMAX = ctx -> local_datapoints[0].ngbh.N - 1;   
    idx_t kMAX = ctx -> k - 1;   

    float_t omega = 0.;  
    if(sizeof(float_t) == sizeof(float)){ omega = powf(PI_F,d/2)/tgammaf(d/2.0f + 1.0f);}  
@@ -174,7 +174,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
    {
        for(idx_t k = 0; k <= kMAX; ++k)
        {
            local_datapoints[i].ngbh.data[k].value = omega * pow(local_datapoints[i].ngbh.data[k].value, d/2.);  
            local_datapoints[i].ngbh[k].value = omega * pow(local_datapoints[i].ngbh[k].value, d/2.);  
        }
        //initialize kstar at 0
        local_datapoints[i].kstar = 0;
@@ -191,8 +191,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
            //request data
            idx_t ksel = j - 1;

#if defined(THREAD_FUNNELED)
#else
#if !defined(THREAD_FUNNELED)
            #pragma omp parallel for
#endif

@@ -203,7 +202,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
                {
                    //vvi = omega * pow(local_datapoints[i].ngbh.data[ksel].value,d/2.);

                    idx_t jj = local_datapoints[i].ngbh.data[j].array_idx;
                    idx_t jj = local_datapoints[i].ngbh[j].array_idx;

                    /* 
                     * note jj can be an halo point 
@@ -215,7 +214,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int

                    if(owner == ctx -> mpi_rank)
                    {
                        scratch_heap_nodes[i] = ctx -> local_datapoints[pos].ngbh.data[ksel];
                        scratch_heap_nodes[i] = ctx -> local_datapoints[pos].ngbh[ksel];
                    }
                    else
                    {
@@ -234,8 +233,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int

            MPI_Win_fence(MPI_MODE_NOPUT,exposed_ngbh); 
            //process data
#if defined(THREAD_FUNNELED)
#else
#if !defined(THREAD_FUNNELED)
            #pragma omp parallel for
#endif
            for(idx_t i = 0; i < ctx -> local_n_points; ++i)
@@ -243,7 +241,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
                if(ctx -> local_datapoints[i].kstar == 0)
                {
                    float_t vvi, vvj, vp, dL;
                    vvi = local_datapoints[i].ngbh.data[ksel].value;
                    vvi = local_datapoints[i].ngbh[ksel].value;
                    vvj = scratch_heap_nodes[i].value;
                    vp = (vvi + vvj)*(vvi + vvj);
                    dL = -2.0 * ksel * log(4.*vvi*vvj/vp);
@@ -278,7 +276,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
            {
                idx_t k = kMAX - 1;

                float_t vvi = ctx -> local_datapoints[i].ngbh.data[k].value;
                float_t vvi = ctx -> local_datapoints[i].ngbh[k].value;

                local_datapoints[i].kstar = k;
                local_datapoints[i].log_rho = log((float_t)(k)/vvi/((float_t)(ctx -> n_points)));
@@ -326,7 +324,7 @@ float_t get_j_ksel_dist_v2(global_context_t* ctx, idx_t i, idx_t j, idx_t ksel,
        if(owner == ctx -> mpi_rank)
        {
            idx_t pos = j - ctx -> idx_start;
            return ctx -> local_datapoints[pos].ngbh.data[ksel].value;
            return ctx -> local_datapoints[pos].ngbh[ksel].value;
        }
        else
        {
@@ -646,10 +644,9 @@ clusters_t Heuristic1(global_context_t *ctx)
        dp_info[i].is_center = 1;
        dp_info[i].cluster_idx = -1;
        //printf("%lf\n",p -> g);
        heap i_ngbh = dp_info[i].ngbh;
        for(idx_t k = 1; k < maxk; ++k)
        {
            idx_t ngbh_index = i_ngbh.data[k].array_idx;
            idx_t ngbh_index = dp_info[i].ngbh[k].array_idx;
            datapoint_info_t dj = find_possibly_halo_datapoint_rma(ctx, ngbh_index, win_datapoints);
            float_t gj = dj.g;
            if(gj > gi){
@@ -697,7 +694,7 @@ clusters_t Heuristic1(global_context_t *ctx)

        for(idx_t j = 1; j < i_point.kstar + 1; ++j)
        {
            idx_t jidx = i_point.ngbh.data[j].array_idx; 
            idx_t jidx = i_point.ngbh[j].array_idx; 

            datapoint_info_t j_point = find_possibly_halo_datapoint_rma(ctx, jidx,  win_datapoints);

@@ -863,7 +860,7 @@ clusters_t Heuristic1(global_context_t *ctx)
                int cluster = -1;
                idx_t k = 0;
                idx_t p_idx;
                idx_t max_k = p -> ngbh.N;
                idx_t max_k = ctx -> k;
                /*assign each particle at the same cluster as the nearest particle of higher density*/
                datapoint_info_t p_retrieved;
                while( k < max_k - 1 && cluster == -1)
@@ -871,7 +868,7 @@ clusters_t Heuristic1(global_context_t *ctx)


                    ++k;
                    p_idx = p -> ngbh.data[k].array_idx;
                    p_idx = p -> ngbh[k].array_idx;
                    p_retrieved = find_possibly_halo_datapoint_rma(ctx, p_idx, win_datapoints);

                    int flag = p_retrieved.g > p -> g;
@@ -890,7 +887,7 @@ clusters_t Heuristic1(global_context_t *ctx)
                    idx_t gm_index = SIZE_MAX;
                    for(idx_t k = 0; k < max_k; ++k)
                    {
                        idx_t ngbh_index = p -> ngbh.data[k].array_idx;
                        idx_t ngbh_index = p -> ngbh[k].array_idx;
                        for(idx_t m = 0; m < removed_centers.count; ++m)
                        {
                            idx_t max_rho_idx = max_rho.data[m];
@@ -1006,7 +1003,7 @@ void Heuristic2(global_context_t* ctx, clusters_t* cluster)
	}

    idx_t nclus = cluster->centers.count; 
    idx_t max_k = dp_info[0].ngbh.N;
    idx_t max_k = ctx -> k;

    for(idx_t i = 0; i < n; ++i)
    {
@@ -1018,7 +1015,7 @@ void Heuristic2(global_context_t* ctx, clusters_t* cluster)
            for(idx_t k = 1; k < dp_info[i].kstar + 1; ++k)
            {
                /*index of the kth ngbh of n*/
                idx_t j = dp_info[i].ngbh.data[k].array_idx;
                idx_t j = dp_info[i].ngbh[k].array_idx;
                pp = NOBORDER;
                /*Loop over kn neigbhours to find if n is the nearest*/
                /*if cluster of the particle in nbhg is c then check is neighborhood*/                                                
+1 −1
Original line number Diff line number Diff line
@@ -125,7 +125,7 @@ typedef struct datapoint_info_t {
    /*
     * datapoint object 
     */
    heap ngbh;              //heap object stores nearest neighbors of each point
    heap_node* ngbh;        //heap object stores nearest neighbors of each point
    int is_center;          //flag signaling if a point is a cluster center
    int cluster_idx;        //cluster index
    idx_t array_idx;        //global index of the point in the dataset
+1 −3
Original line number Diff line number Diff line
@@ -384,9 +384,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    datapoint_info_t* dp_info = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t));            
    for(uint64_t i = 0; i < ctx -> local_n_points; ++i)
    {
        dp_info[i].ngbh.data = NULL;
        dp_info[i].ngbh.N = 0;
        dp_info[i].ngbh.count = 0;
        dp_info[i].ngbh = NULL;
        dp_info[i].g = 0.f;
        dp_info[i].log_rho = 0.f;
        dp_info[i].log_rho_c = 0.f;
+20 −9
Original line number Diff line number Diff line
@@ -1483,17 +1483,19 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
    {
        idx_t idx = local_tree -> _nodes[p].array_idx;
        /* actually we want to preserve the heap to then insert guesses from other nodes */
        dp_info[idx].ngbh.data  = ctx -> __local_heap_buffers + k * idx;
        dp_info[idx].ngbh.count = 0;
        dp_info[idx].ngbh.N     = k;
        heap tmp_heap;
        tmp_heap.data  = ctx -> __local_heap_buffers + k * idx;
        tmp_heap.count = 0;
        tmp_heap.N     = k;

        //dp_info[idx].ngbh = knn_kdtree_v2_no_heapsort(local_tree -> _nodes[p].data, local_tree -> root, k);
        knn_sub_tree_search_kdtree_v2(local_tree -> _nodes[p].data, local_tree -> root, &(dp_info[idx].ngbh));
        knn_sub_tree_search_kdtree_v2(local_tree -> _nodes[p].data, local_tree -> root, &tmp_heap);

        convert_heap_idx_to_global(ctx, &(dp_info[idx].ngbh));
        convert_heap_idx_to_global(ctx, &tmp_heap);
        dp_info[idx].cluster_idx = -1;
        dp_info[idx].is_center = 0;
        dp_info[idx].array_idx = idx + ctx -> idx_start;
        dp_info[idx].ngbh = tmp_heap.data;
    }
    elapsed_time = TIME_STOP;
    LOG_WRITE("Local neighborhood search", elapsed_time);
@@ -1517,7 +1519,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
    #pragma omp parallel for
    for(int i = 0; i < ctx -> local_n_points; ++i)
    {
        float_t max_dist = dp_info[i].ngbh.data[0].value;
        float_t max_dist = dp_info[i].ngbh[0].value;
        float_t* point   = ctx -> local_data + (i * ctx -> dims);
        
        tree_walk_v2_find_n_points(ctx, top_tree -> root, i, max_dist, point, point_to_snd_capacity);
@@ -1537,7 +1539,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
    #pragma omp parallel for
    for(int i = 0; i < ctx -> local_n_points; ++i)
    {
        float_t max_dist = dp_info[i].ngbh.data[0].value;
        float_t max_dist = dp_info[i].ngbh[0].value;
        float_t* point   = ctx -> local_data + (i * ctx -> dims);

        tree_walk_v2_append_points(ctx, top_tree -> root, i, max_dist, point, data_to_send_per_proc, local_idx_of_the_point, point_to_snd_count);
@@ -1832,7 +1834,11 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
            /* insert the points into the heap */
            for(int j = 0; j < k; ++j)
            {
                insert_max_heap(&(dp_info[idx].ngbh), H.data[j].value, H.data[j].array_idx);
                heap tmp_heap;
                tmp_heap.N     = k;
                tmp_heap.count = k;
                tmp_heap.data  = dp_info[idx].ngbh;
                insert_max_heap(&(tmp_heap), H.data[j].value, H.data[j].array_idx);
            }
        }

@@ -1858,7 +1864,12 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
    #pragma omp parallel for
    for(int i = 0; i < ctx -> local_n_points; ++i)
    {
        heap_sort(&(dp_info[i].ngbh));
        heap tmp_heap;
        tmp_heap.N     = k;
        tmp_heap.count = k;
        tmp_heap.data  = dp_info[i].ngbh;

        heap_sort(&(tmp_heap));
    }

    elapsed_time = TIME_STOP;