Commit a0caf8af authored by lykos98's avatar lykos98
Browse files

added distributed output if nodes > 6, tested up to 1B points

parent 25dce9c1
Loading
Loading
Loading
Loading
+3 −68
Original line number Diff line number Diff line
@@ -727,7 +727,7 @@ void compute_correction(global_context_t* ctx, float_t Z)

}

clusters_t Heuristic1(global_context_t *ctx, int verbose)
clusters_t Heuristic1(global_context_t *ctx)
{
    /*
     * Heurisitc 1, from paper of Errico, Facco, Laio & Rodriguez 
@@ -740,15 +740,8 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
    struct timespec start_tot, finish_tot;
    double elapsed_tot;

	if(verbose)
	{
		printf("H1: Preliminary cluster assignment\n");
		clock_gettime(CLOCK_MONOTONIC, &start_tot);
	}
    
    TIME_DEF;


    lu_dynamic_array_t all_centers, removed_centers, actual_centers, max_rho;

    lu_dynamic_array_allocate(&all_centers);
@@ -758,15 +751,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)

    datapoint_info_t** dp_info_ptrs = (datapoint_info_t**)MY_MALLOC(n*sizeof(datapoint_info_t*));

    struct timespec start, finish;
    double elapsed;


    if(verbose)
	{
		clock_gettime(CLOCK_MONOTONIC, &start);
	}

    /* proceed */

    MPI_Win win_datapoints;
@@ -779,7 +763,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
#else
    #pragma omp parallel for
#endif

    for(idx_t i = 0; i < n; ++i)
    {   
        /*
@@ -813,15 +796,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
        }
    }

    if(verbose)
	{
		clock_gettime(CLOCK_MONOTONIC, &finish);
		elapsed = (finish.tv_sec - start.tv_sec);
		elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
		printf("\tFinding putative centers: %.3lfs\n",elapsed);
		clock_gettime(CLOCK_MONOTONIC, &start);
	}

	/* 
	 * optimized version
	 *
@@ -843,9 +817,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
    MPI_Win_create(to_remove_mask, n * sizeof(heap_node), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_to_remove_mask);
    MPI_Win_fence(0, win_to_remove_mask);




    /* 
     * to remove 
     * and to reimplement it using rma
@@ -963,15 +934,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
    MPI_Win_free(&win_to_remove_mask);
	free(to_remove_mask);

    if(verbose)
	{
		clock_gettime(CLOCK_MONOTONIC, &finish);
		elapsed = (finish.tv_sec - start.tv_sec);
		elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
		printf("\tFinding actual centers:   %.3lfs\n",elapsed);
		clock_gettime(CLOCK_MONOTONIC, &start);
	}

    int n_centers = (int)actual_centers.count;
    int tot_centers;
    MPI_Allreduce(&n_centers, &tot_centers, 1, MPI_INT, MPI_SUM, ctx -> mpi_communicator);
@@ -983,7 +945,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
     * then re-scatter them around to get unique cluster labels */ 

    center_t* private_centers_buffer = (center_t*)MY_MALLOC(actual_centers.count * sizeof(center_t));

    center_t* global_centers_buffer  = (center_t*)MY_MALLOC(tot_centers * sizeof(center_t));

    for(int i = 0; i < actual_centers.count; ++i)
@@ -1119,15 +1080,6 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)

    MPI_Barrier(ctx -> mpi_communicator);

    if(verbose)
	{
		clock_gettime(CLOCK_MONOTONIC, &finish);
		elapsed = (finish.tv_sec - start.tv_sec);
		elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
		printf("\tTentative clustering:     %.3lfs\n",elapsed);
		clock_gettime(CLOCK_MONOTONIC, &start);
	}

    free(dp_info_ptrs);
    free(max_rho.data);
    free(removed_centers.data);
@@ -1154,27 +1106,10 @@ clusters_t Heuristic1(global_context_t *ctx, int verbose)
    c_all.centers = actual_centers;



    if(verbose)
	{
		clock_gettime(CLOCK_MONOTONIC, &finish);
		elapsed = (finish.tv_sec - start.tv_sec);
		elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
		printf("\tFinalizing clustering:    %.3lfs\n",elapsed);
		printf("\n");
	}

    clock_gettime(CLOCK_MONOTONIC, &finish_tot);
    elapsed_tot = (finish_tot.tv_sec - start_tot.tv_sec);
    elapsed_tot += (finish_tot.tv_nsec - start_tot.tv_nsec) / 1000000000.0;


	if(verbose)
	{
		printf("\tFound %lu clusters\n",(uint64_t)actual_centers.count);
		printf("\tTotal time: %.3lfs\n\n", elapsed_tot);
	}

    c_all.n = n;
    return c_all;
}
@@ -2060,8 +1995,8 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo)
					//int halo_flag = max_border_den_array[cidx] > dp_info[i].log_rho_c  ; 

                    //changed_here
					//dp_info[i].cluster_idx = halo_flag ? -1 : cidx;
					dp_info[i].halo_flag = halo_flag;
                    //halo points have cidx < 0 (old cidx = (c + 1) * -1 )
					dp_info[i].cluster_idx = halo_flag ? (cidx * (-1)) - 1 : cidx;
				}
			}
			free(max_border_den_array);
+1 −1
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int
float_t compute_ID_two_NN_ML(global_context_t* ctx, datapoint_info_t* dp_info, idx_t n, int verbose);
void clusters_allocate(clusters_t * c, int s);

clusters_t Heuristic1(global_context_t *ctx, int verbose);
clusters_t Heuristic1(global_context_t *ctx);
void Heuristic2(global_context_t* ctx, clusters_t* cluster);
void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo);
void clusters_free(clusters_t * c);
+4 −5
Original line number Diff line number Diff line
@@ -22,16 +22,15 @@
//#define PRINT_ORDERED_BUFFER

typedef struct datapoint_info_t {
    idx_t array_idx;
    heap ngbh;
    int is_center;
    int cluster_idx;
    idx_t array_idx;
    idx_t kstar;
    float_t g;
    float_t log_rho;
    float_t log_rho_c;
    float_t log_rho_err;
    idx_t kstar;
    int is_center;
    int cluster_idx;
    int halo_flag;
} datapoint_info_t;

#define MAX(A,B) ((A) > (B) ? (A) : (B))
+8 −23
Original line number Diff line number Diff line
@@ -104,6 +104,9 @@ int main(int argc, char** argv) {

void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx) 
{
    /* TODO
     *
     */
    float_t *data;
    TIME_DEF
    double elapsed_time;
@@ -117,13 +120,11 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    {
        test_file_path(OUT_DATA);
        test_file_path(OUT_CLUSTER_ASSIGN);
        if(halo) test_file_path(OUT_HALO_FLAGS);
    }
    else
    {
        test_distributed_file_path(ctx, OUT_DATA);
        test_distributed_file_path(ctx, OUT_CLUSTER_ASSIGN);
        if(halo) test_distributed_file_path(ctx, OUT_HALO_FLAGS);
    }


@@ -149,10 +150,10 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)

        // 190M points
        // std_g2980844_091_0000
        // data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",5,MY_TRUE);
        data = read_data_file(ctx,"../norm_data/std_g2980844_091_0000",5,MY_TRUE);
        
        /* 1M points ca.*/
        data = read_data_file(ctx,"../norm_data/std_LR_091_0001",5,MY_TRUE);
        // data = read_data_file(ctx,"../norm_data/std_LR_091_0001",5,MY_TRUE);

        /* BOX */
        // data = read_data_file(ctx,"../norm_data/std_Box_256_30_092_0000",MY_TRUE);
@@ -289,7 +290,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
        dp_info[i].kstar = -1;
        dp_info[i].is_center = -1;
        dp_info[i].cluster_idx = -1;
        dp_info[i].halo_flag = 0;
        //dp_info[i].halo_flag = 0;
    }
    ctx -> local_datapoints = dp_info;

@@ -326,7 +327,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    LOG_WRITE("Density estimate", elapsed_time)

    TIME_START;
    clusters_t clusters = Heuristic1(ctx, MY_FALSE);
    clusters_t clusters = Heuristic1(ctx);
    elapsed_time = TIME_STOP;
    LOG_WRITE("H1", elapsed_time)

@@ -352,14 +353,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
        big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, OUT_CLUSTER_ASSIGN);
        big_ordered_buffer_to_file(ctx, ctx -> local_data, sizeof(double), ctx -> local_n_points * ctx -> dims, OUT_DATA);

        if(halo)
        {
            int* halo_flags = (int*)MY_MALLOC(ctx -> local_n_points * sizeof(int));
            for(int i = 0; i < ctx -> local_n_points; ++i) cl[i] = ctx -> local_datapoints[i].halo_flag;
            big_ordered_buffer_to_file(ctx, halo_flags, sizeof(int), ctx -> local_n_points, OUT_HALO_FLAGS);
            free(halo_flags);
        }

        free(cl);

    }
@@ -368,14 +361,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
        distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, OUT_CLUSTER_ASSIGN);
        distributed_buffer_to_file(ctx, ctx -> local_data, sizeof(double), ctx -> local_n_points * ctx -> dims, OUT_DATA);

        if(halo)
        {
            int* halo_flags = (int*)MY_MALLOC(ctx -> local_n_points * sizeof(int));
            for(int i = 0; i < ctx -> local_n_points; ++i) cl[i] = ctx -> local_datapoints[i].halo_flag;
            distributed_buffer_to_file(ctx, halo_flags, sizeof(int), ctx -> local_n_points, OUT_HALO_FLAGS);
            free(halo_flags);
        }

        free(cl);

    }