Loading src/adp/adp.c +40 −32 Original line number Diff line number Diff line #include "adp.h" #include <unistd.h> const border_t border_null = {.density = -1.0, .error = 0, .idx = NOBORDER}; const sparse_border_t sparse_border_null = {.density = -1.0, .error = 0, .idx = NOBORDER, .i = NOBORDER, .j = NOBORDER}; Loading Loading @@ -617,6 +618,8 @@ lock_t h1_lock_acquire(global_context_t* ctx, MPI_Win lock_window, int owner, id while(result == LOCK_ACQUIRED && err == MPI_SUCCESS) { err = MPI_Compare_and_swap(&state, &compare, &result, MPI_LOCK_T, owner, pos, lock_window); MPI_Win_flush(owner, lock_window); usleep(100); } if(err != MPI_SUCCESS) Loading Loading @@ -677,8 +680,7 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_fence(0, win_datapoints); MPI_Win_lock_all(0, win_datapoints); #if defined(THREAD_FUNNELED) #else #if !defined(THREAD_FUNNELED) #pragma omp parallel for #endif for(idx_t i = 0; i < n; ++i) Loading Loading @@ -741,14 +743,17 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_create(lock_array, n * sizeof(lock_t), sizeof(lock_t), MPI_INFO_NULL, ctx -> mpi_communicator, &win_locks); MPI_Win_fence(0, win_locks); #ifdef EXP_H1 MPI_Win_lock_all(0, win_to_remove_mask); MPI_Win_lock_all(0, win_locks); #endif #ifdef EXP_H1 printf("Using experimental h1\n"); #endif #if defined(THREAD_FUNNELED) #else #pragma omp parallel for #if !defined(THREAD_FUNNELED) #pragma omp parallel for schedule(dynamic) #endif for(idx_t p = 0; p < n; ++p) { Loading @@ -769,7 +774,9 @@ clusters_t Heuristic1(global_context_t *ctx) * * */ #ifdef EXPERIMENTAL_H1 #ifdef EXP_H1 #pragma omp critical (h1_exp) { int owner = foreign_owner(ctx, jidx); idx_t jpos = jidx - ctx -> rank_idx_start[owner]; Loading @@ -796,9 +803,9 @@ clusters_t Heuristic1(global_context_t *ctx) } state = h1_lock_free(ctx, win_locks, owner, jpos, state); } #else #pragma omp critical (h1_centers_elimination) #pragma omp critical (centers_elimination) { int owner = foreign_owner(ctx, jidx); idx_t jpos = jidx - ctx -> rank_idx_start[owner]; Loading @@ -822,13 +829,16 @@ clusters_t Heuristic1(global_context_t *ctx) } MPI_Win_unlock(owner, win_to_remove_mask); } #endif } } } #ifdef EXP_H1 MPI_Win_unlock_all(win_to_remove_mask); MPI_Win_unlock_all(win_locks); #endif MPI_Win_fence(0, win_to_remove_mask); MPI_Win_fence(0, win_locks); Loading Loading @@ -1815,8 +1825,6 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo) qsort(centers_dp, cluster -> centers.count, sizeof(datapoint_info_t), compare_dp_by_cidx); printf("Centers\n"); master_finds_borders(ctx, cluster, Z, surviving_clusters, centers_dp); master_fixes_border_matrix_and_centers(ctx, cluster, Z, old_to_new, surviving_clusters, nclus); free(centers_dp); Loading src/main/main.c +0 −47 Original line number Diff line number Diff line Loading @@ -232,52 +232,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) if (ctx->mpi_rank == 0) { data = read_data_file(ctx,ctx -> input_data_file, ctx -> dims, ctx -> input_data_in_float32); //data = read_data_file(ctx, "../norm_data/50_blobs_more_var.npy", MY_TRUE); //ctx->dims = 2; //data = read_data_file(ctx, "../norm_data/blobs_small.npy", MY_FALSE); //data = read_data_file(ctx, "../norm_data/blobs_small.npy", MY_FALSE); // std_g0163178_Me14_091_0000 // 100M points // 2D // std_g2980844_091_0000 //data = read_data_file(ctx,"../norm_data/huge_blobs.npy",MY_FALSE); // 2B points // data = read_data_file(ctx,"../norm_data/very_huge_blobs.npy",MY_FALSE); // data = read_data_file(ctx,"../norm_data/hd_blobs.npy",5,MY_FALSE); //1B points // data = read_data_file(ctx,"../norm_data/eds_box_acc_normalized",5,MY_FALSE); // data = read_data_file(ctx,"../norm_data/eds_box_6d",6,MY_FALSE); // 190M points // std_g2980844_091_0000 // data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",5,MY_TRUE); /* 1M points ca.*/ //data = read_data_file(ctx,"../norm_data/std_LR_091_0001",5,MY_TRUE); /* BOX */ // data = read_data_file(ctx,"../norm_data/std_Box_256_30_092_0000",MY_TRUE); /* 8M points */ // data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",5,MY_TRUE); //88M // data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE); // //34 M //data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE); //for weak scalability //ctx->n_points = ctx->n_points / 4; //ctx->n_points = (ctx->n_points / 32) * ctx -> world_size; get_dataset_diagnostics(ctx, data); } /* communicate the total number of points*/ Loading Loading @@ -320,9 +275,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) int count_send = MIN(default_msg_len, send_counts[i] - already_sent_points); MPI_Send(data + displacements[i] + already_sent_points, count_send, MPI_MY_FLOAT, i, ctx -> mpi_rank, ctx -> mpi_communicator); already_sent_points += count_send; //DB_PRINT("[RANK 0] has sent to rank %d %d elements out of %lu\n",i, already_sent_points, send_counts[i]); } //DB_PRINT("------------------------------------------------\n"); } } else Loading src/tree/kdtreeV2.c +0 −3 Original line number Diff line number Diff line Loading @@ -228,9 +228,6 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare } int median_idx = -1; //if ((end - start) < 0) return 0; Loading src/tree/tree.c +4 −3 Original line number Diff line number Diff line Loading @@ -1478,7 +1478,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre //ctx -> __local_heap_buffers = (heap_node*)MY_MALLOC(ctx -> local_n_points * k * sizeof(heap_node)); MPI_Alloc_mem(ctx -> local_n_points * k * sizeof(heap_node), MPI_INFO_NULL, &(ctx -> __local_heap_buffers)); #pragma omp parallel for #pragma omp parallel for schedule(dynamic) for(int p = 0; p < ctx -> local_n_points; ++p) { idx_t idx = local_tree -> _nodes[p].array_idx; Loading Loading @@ -1663,7 +1663,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre //if(count_rcv_work_batches[p] > 0) { //heap_batches_per_node[p] = (heap_node*)MY_MALLOC(k * point_to_rcv_count[p] * sizeof(heap_node)); #pragma omp parallel for #pragma omp parallel for schedule(dynamic) for(int batch = 0; batch < point_to_rcv_count[p]; ++batch) { heap H; Loading Loading @@ -1822,7 +1822,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre } /* merge lists */ #pragma omp paralell for #pragma omp parallel for for(int b = 0; b < ngbh_to_recv[rank_to_recv]; ++b) { int idx = local_idx_of_the_point[rank_to_recv][b]; Loading @@ -1843,6 +1843,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre } MPI_Barrier(ctx -> mpi_communicator); } Loading Loading
src/adp/adp.c +40 −32 Original line number Diff line number Diff line #include "adp.h" #include <unistd.h> const border_t border_null = {.density = -1.0, .error = 0, .idx = NOBORDER}; const sparse_border_t sparse_border_null = {.density = -1.0, .error = 0, .idx = NOBORDER, .i = NOBORDER, .j = NOBORDER}; Loading Loading @@ -617,6 +618,8 @@ lock_t h1_lock_acquire(global_context_t* ctx, MPI_Win lock_window, int owner, id while(result == LOCK_ACQUIRED && err == MPI_SUCCESS) { err = MPI_Compare_and_swap(&state, &compare, &result, MPI_LOCK_T, owner, pos, lock_window); MPI_Win_flush(owner, lock_window); usleep(100); } if(err != MPI_SUCCESS) Loading Loading @@ -677,8 +680,7 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_fence(0, win_datapoints); MPI_Win_lock_all(0, win_datapoints); #if defined(THREAD_FUNNELED) #else #if !defined(THREAD_FUNNELED) #pragma omp parallel for #endif for(idx_t i = 0; i < n; ++i) Loading Loading @@ -741,14 +743,17 @@ clusters_t Heuristic1(global_context_t *ctx) MPI_Win_create(lock_array, n * sizeof(lock_t), sizeof(lock_t), MPI_INFO_NULL, ctx -> mpi_communicator, &win_locks); MPI_Win_fence(0, win_locks); #ifdef EXP_H1 MPI_Win_lock_all(0, win_to_remove_mask); MPI_Win_lock_all(0, win_locks); #endif #ifdef EXP_H1 printf("Using experimental h1\n"); #endif #if defined(THREAD_FUNNELED) #else #pragma omp parallel for #if !defined(THREAD_FUNNELED) #pragma omp parallel for schedule(dynamic) #endif for(idx_t p = 0; p < n; ++p) { Loading @@ -769,7 +774,9 @@ clusters_t Heuristic1(global_context_t *ctx) * * */ #ifdef EXPERIMENTAL_H1 #ifdef EXP_H1 #pragma omp critical (h1_exp) { int owner = foreign_owner(ctx, jidx); idx_t jpos = jidx - ctx -> rank_idx_start[owner]; Loading @@ -796,9 +803,9 @@ clusters_t Heuristic1(global_context_t *ctx) } state = h1_lock_free(ctx, win_locks, owner, jpos, state); } #else #pragma omp critical (h1_centers_elimination) #pragma omp critical (centers_elimination) { int owner = foreign_owner(ctx, jidx); idx_t jpos = jidx - ctx -> rank_idx_start[owner]; Loading @@ -822,13 +829,16 @@ clusters_t Heuristic1(global_context_t *ctx) } MPI_Win_unlock(owner, win_to_remove_mask); } #endif } } } #ifdef EXP_H1 MPI_Win_unlock_all(win_to_remove_mask); MPI_Win_unlock_all(win_locks); #endif MPI_Win_fence(0, win_to_remove_mask); MPI_Win_fence(0, win_locks); Loading Loading @@ -1815,8 +1825,6 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo) qsort(centers_dp, cluster -> centers.count, sizeof(datapoint_info_t), compare_dp_by_cidx); printf("Centers\n"); master_finds_borders(ctx, cluster, Z, surviving_clusters, centers_dp); master_fixes_border_matrix_and_centers(ctx, cluster, Z, old_to_new, surviving_clusters, nclus); free(centers_dp); Loading
src/main/main.c +0 −47 Original line number Diff line number Diff line Loading @@ -232,52 +232,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) if (ctx->mpi_rank == 0) { data = read_data_file(ctx,ctx -> input_data_file, ctx -> dims, ctx -> input_data_in_float32); //data = read_data_file(ctx, "../norm_data/50_blobs_more_var.npy", MY_TRUE); //ctx->dims = 2; //data = read_data_file(ctx, "../norm_data/blobs_small.npy", MY_FALSE); //data = read_data_file(ctx, "../norm_data/blobs_small.npy", MY_FALSE); // std_g0163178_Me14_091_0000 // 100M points // 2D // std_g2980844_091_0000 //data = read_data_file(ctx,"../norm_data/huge_blobs.npy",MY_FALSE); // 2B points // data = read_data_file(ctx,"../norm_data/very_huge_blobs.npy",MY_FALSE); // data = read_data_file(ctx,"../norm_data/hd_blobs.npy",5,MY_FALSE); //1B points // data = read_data_file(ctx,"../norm_data/eds_box_acc_normalized",5,MY_FALSE); // data = read_data_file(ctx,"../norm_data/eds_box_6d",6,MY_FALSE); // 190M points // std_g2980844_091_0000 // data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",5,MY_TRUE); /* 1M points ca.*/ //data = read_data_file(ctx,"../norm_data/std_LR_091_0001",5,MY_TRUE); /* BOX */ // data = read_data_file(ctx,"../norm_data/std_Box_256_30_092_0000",MY_TRUE); /* 8M points */ // data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",5,MY_TRUE); //88M // data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE); // //34 M //data = read_data_file(ctx,"../norm_data/std_g1212639_091_0001",MY_TRUE); //for weak scalability //ctx->n_points = ctx->n_points / 4; //ctx->n_points = (ctx->n_points / 32) * ctx -> world_size; get_dataset_diagnostics(ctx, data); } /* communicate the total number of points*/ Loading Loading @@ -320,9 +275,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) int count_send = MIN(default_msg_len, send_counts[i] - already_sent_points); MPI_Send(data + displacements[i] + already_sent_points, count_send, MPI_MY_FLOAT, i, ctx -> mpi_rank, ctx -> mpi_communicator); already_sent_points += count_send; //DB_PRINT("[RANK 0] has sent to rank %d %d elements out of %lu\n",i, already_sent_points, send_counts[i]); } //DB_PRINT("------------------------------------------------\n"); } } else Loading
src/tree/kdtreeV2.c +0 −3 Original line number Diff line number Diff line Loading @@ -228,9 +228,6 @@ kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* pare } int median_idx = -1; //if ((end - start) < 0) return 0; Loading
src/tree/tree.c +4 −3 Original line number Diff line number Diff line Loading @@ -1478,7 +1478,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre //ctx -> __local_heap_buffers = (heap_node*)MY_MALLOC(ctx -> local_n_points * k * sizeof(heap_node)); MPI_Alloc_mem(ctx -> local_n_points * k * sizeof(heap_node), MPI_INFO_NULL, &(ctx -> __local_heap_buffers)); #pragma omp parallel for #pragma omp parallel for schedule(dynamic) for(int p = 0; p < ctx -> local_n_points; ++p) { idx_t idx = local_tree -> _nodes[p].array_idx; Loading Loading @@ -1663,7 +1663,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre //if(count_rcv_work_batches[p] > 0) { //heap_batches_per_node[p] = (heap_node*)MY_MALLOC(k * point_to_rcv_count[p] * sizeof(heap_node)); #pragma omp parallel for #pragma omp parallel for schedule(dynamic) for(int batch = 0; batch < point_to_rcv_count[p]; ++batch) { heap H; Loading Loading @@ -1822,7 +1822,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre } /* merge lists */ #pragma omp paralell for #pragma omp parallel for for(int b = 0; b < ngbh_to_recv[rank_to_recv]; ++b) { int idx = local_idx_of_the_point[rank_to_recv][b]; Loading @@ -1843,6 +1843,7 @@ void mpi_ngbh_search(global_context_t* ctx, datapoint_info_t* dp_info, top_kdtre } MPI_Barrier(ctx -> mpi_communicator); } Loading