Commit aecabbb9 authored by lykos98's avatar lykos98
Browse files

added some work on lock free impl of H1, still not working as expected, poor performance

parent 5e0399c9
Loading
Loading
Loading
Loading
+40 −32
Original line number Diff line number Diff line
#include "adp.h"
#include <unistd.h>

const border_t border_null = {.density = -1.0, .error = 0, .idx = NOBORDER};
const sparse_border_t sparse_border_null = {.density = -1.0, .error = 0, .idx = NOBORDER, .i = NOBORDER, .j = NOBORDER};
@@ -617,6 +618,8 @@ lock_t h1_lock_acquire(global_context_t* ctx, MPI_Win lock_window, int owner, id
        while(result == LOCK_ACQUIRED && err == MPI_SUCCESS)
        {
            err = MPI_Compare_and_swap(&state, &compare, &result, MPI_LOCK_T, owner, pos, lock_window);
            MPI_Win_flush(owner, lock_window);
            usleep(100);
        }

        if(err != MPI_SUCCESS)
@@ -677,8 +680,7 @@ clusters_t Heuristic1(global_context_t *ctx)
    MPI_Win_fence(0, win_datapoints);
    MPI_Win_lock_all(0,  win_datapoints);

#if defined(THREAD_FUNNELED)
#else
#if !defined(THREAD_FUNNELED)
    #pragma omp parallel for
#endif
    for(idx_t i = 0; i < n; ++i)
@@ -741,14 +743,17 @@ clusters_t Heuristic1(global_context_t *ctx)
    MPI_Win_create(lock_array, n * sizeof(lock_t), sizeof(lock_t), MPI_INFO_NULL, ctx -> mpi_communicator, &win_locks);
    MPI_Win_fence(0, win_locks);

#ifdef EXP_H1
    MPI_Win_lock_all(0, win_to_remove_mask);
    MPI_Win_lock_all(0, win_locks);
#endif

#ifdef EXP_H1
    printf("Using experimental h1\n");
#endif


#if defined(THREAD_FUNNELED)
#else
    #pragma omp parallel for
#if !defined(THREAD_FUNNELED)
    #pragma omp parallel for schedule(dynamic)
#endif
    for(idx_t p = 0; p < n; ++p)
    {
@@ -769,7 +774,9 @@ clusters_t Heuristic1(global_context_t *ctx)
                 *
                 * */

#ifdef EXPERIMENTAL_H1
#ifdef EXP_H1
                #pragma omp critical (h1_exp)
                {
                    int owner = foreign_owner(ctx, jidx);
                    idx_t jpos = jidx - ctx -> rank_idx_start[owner];

@@ -796,9 +803,9 @@ clusters_t Heuristic1(global_context_t *ctx)
                    }

                    state = h1_lock_free(ctx, win_locks, owner, jpos, state);
                }
#else

                #pragma omp critical (h1_centers_elimination)
                #pragma omp critical (centers_elimination)                 
                {
                    int owner = foreign_owner(ctx, jidx);
                    idx_t jpos = jidx - ctx -> rank_idx_start[owner];
@@ -822,13 +829,16 @@ clusters_t Heuristic1(global_context_t *ctx)
                    }

                    MPI_Win_unlock(owner, win_to_remove_mask);
                }
#endif
            }
        }
    }

#ifdef EXP_H1
    MPI_Win_unlock_all(win_to_remove_mask);
    MPI_Win_unlock_all(win_locks);
#endif
    
    MPI_Win_fence(0, win_to_remove_mask);
    MPI_Win_fence(0, win_locks);
@@ -1815,8 +1825,6 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo)

        qsort(centers_dp, cluster -> centers.count, sizeof(datapoint_info_t), compare_dp_by_cidx);

        printf("Centers\n");

        master_finds_borders(ctx, cluster, Z, surviving_clusters, centers_dp);
        master_fixes_border_matrix_and_centers(ctx, cluster, Z, old_to_new, surviving_clusters, nclus);
        free(centers_dp);
+0 −47
Original line number Diff line number Diff line
@@ -232,52 +232,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    if (ctx->mpi_rank == 0) 
    {
        data = read_data_file(ctx,ctx -> input_data_file, ctx -> dims, ctx -> input_data_in_float32);
        
        //data = read_data_file(ctx, "../norm_data/50_blobs_more_var.npy", MY_TRUE);
        //ctx->dims = 2;
        //data = read_data_file(ctx, "../norm_data/blobs_small.npy", MY_FALSE);
        //data = read_data_file(ctx, "../norm_data/blobs_small.npy", MY_FALSE);
        // std_g0163178_Me14_091_0000
    
        // 100M points
        // 2D
        // std_g2980844_091_0000
        //data = read_data_file(ctx,"../norm_data/huge_blobs.npy",MY_FALSE);
        // 2B points
        // data = read_data_file(ctx,"../norm_data/very_huge_blobs.npy",MY_FALSE);
        // data = read_data_file(ctx,"../norm_data/hd_blobs.npy",5,MY_FALSE);
        
        //1B points
        // data = read_data_file(ctx,"../norm_data/eds_box_acc_normalized",5,MY_FALSE);
        // data = read_data_file(ctx,"../norm_data/eds_box_6d",6,MY_FALSE);

        // 190M points
        // std_g2980844_091_0000
        // data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",5,MY_TRUE);
        
        /* 1M points ca.*/
        //data = read_data_file(ctx,"../norm_data/std_LR_091_0001",5,MY_TRUE);

        /* BOX */
        // data = read_data_file(ctx,"../norm_data/std_Box_256_30_092_0000",MY_TRUE);

        /* 8M points */
        
        // data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",5,MY_TRUE);

        //88M 
        // data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE);

        //
        //34 M
        //data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE);
        
        //for weak scalability 
        //ctx->n_points = ctx->n_points / 4;
        //ctx->n_points = (ctx->n_points / 32) * ctx -> world_size;

        get_dataset_diagnostics(ctx, data);

    }
    
    /* communicate the total number of points*/
@@ -320,9 +275,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
                int count_send = MIN(default_msg_len, send_counts[i] - already_sent_points); 
                MPI_Send(data + displacements[i] + already_sent_points, count_send, MPI_MY_FLOAT, i, ctx -> mpi_rank, ctx -> mpi_communicator);
                already_sent_points += count_send;
                //DB_PRINT("[RANK 0] has sent to rank %d %d elements out of %lu\n",i, already_sent_points, send_counts[i]);
            }
            //DB_PRINT("------------------------------------------------\n");
        }
    }
    else
+0 −3
Original line number Diff line number Diff line
@@ -228,9 +228,6 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare
        
    }
    
    


    int median_idx = -1;
    
    //if ((end - start) < 0) return 0;
+4 −3
Original line number Diff line number Diff line
@@ -1478,7 +1478,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
    //ctx -> __local_heap_buffers = (heap_node*)MY_MALLOC(ctx -> local_n_points * k * sizeof(heap_node));
    MPI_Alloc_mem(ctx -> local_n_points * k * sizeof(heap_node), MPI_INFO_NULL, &(ctx -> __local_heap_buffers));

    #pragma omp parallel for
    #pragma omp parallel for schedule(dynamic)
    for(int p = 0; p < ctx -> local_n_points; ++p)
    {
        idx_t idx = local_tree -> _nodes[p].array_idx;
@@ -1663,7 +1663,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
        //if(count_rcv_work_batches[p] > 0)
        {
            //heap_batches_per_node[p] = (heap_node*)MY_MALLOC(k * point_to_rcv_count[p] * sizeof(heap_node));
            #pragma omp parallel for
            #pragma omp parallel for schedule(dynamic)
            for(int batch = 0; batch < point_to_rcv_count[p]; ++batch)
            {
                heap H;
@@ -1822,7 +1822,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
        }

        /* merge lists */
        #pragma omp paralell for
        #pragma omp parallel for
        for(int b = 0; b < ngbh_to_recv[rank_to_recv]; ++b)
        {
            int idx = local_idx_of_the_point[rank_to_recv][b];
@@ -1843,6 +1843,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre
        }



        MPI_Barrier(ctx -> mpi_communicator);
    }