Commit bb980b8e authored by lykos98's avatar lykos98
Browse files

added some comments on structs

parent a0caf8af
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
main
sync.sh
leo_sync.sh
lumi_sync.sh
bb
**.ipynb*
scalability_results
+59 −89
Original line number Diff line number Diff line
@@ -817,20 +817,6 @@ clusters_t Heuristic1(global_context_t *ctx)
    MPI_Win_create(to_remove_mask, n * sizeof(heap_node), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_to_remove_mask);
    MPI_Win_fence(0, win_to_remove_mask);

    /* 
     * to remove 
     * and to reimplement it using rma
     * for dp in datapoints:
     *  for nghb in neighbors:
     *   if ngbh its_mine
     *      no_problems, do it as always
     *   else:
     *      ngbh is an external node
     *      do rma things,
     *      in particular, acquire a lock on the window, read and write things
     *      then close
     */

#if defined(THREAD_FUNNELED)
#else
    #pragma omp parallel for
@@ -851,10 +837,8 @@ clusters_t Heuristic1(global_context_t *ctx)
                {
                    /*
                     *
                     * THIS PART CAN BE SUBSTITUTED
                     * we have something like (value, idx) pairs and if we have less than 
                     * MAX_UINT32 particles we can manipulate value and idx to fit into a single
                     * UINT64 and then perform a single atomic max operation
                     * TODO: Implement it without this but using private locks
                     * use an array of locks, and compare and swap to actually gain control of the thing
                     *
                     * */
                    int owner = foreign_owner(ctx, jidx);
@@ -1118,8 +1102,6 @@ clusters_t Heuristic1(global_context_t *ctx)
void Heuristic2(global_context_t* ctx, clusters_t* cluster)
{
    /*
     * Port the current implementation to this using rma
     * then perform the matrix reduction
     *
     * Each one computes its borders, then the borders are shared, and the matrix is 
     * reduced to a single huge matrix of borders
@@ -1659,9 +1641,6 @@ void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z,
        #define src surviving_clusters[merging_table[m].source]
        #define trg surviving_clusters[merging_table[m].target]

        //int re_check = ( (src != merging_table[m].source) || (trg != merging_table[m].target) );
		//if(re_check)
		{
        /* 
         * Enforce a that in case of symmetric merging condition the lowest idx cluster 
         * is merged into the higher idx cluster, only to preserve compatibility with 
@@ -1715,7 +1694,6 @@ void master_finds_borders(global_context_t* ctx, clusters_t* cluster, float_t Z,
        default:
            break;
        }
		}
        
        #undef src
        #undef trg
@@ -1814,14 +1792,6 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo)
    /*
     * Heurisitc 3, from paper of Errico, Facco, Laio & Rodriguez 
     * ( https://doi.org/10.1016/j.ins.2021.01.010 )              
	 * Dense implementation, makes use of a dense matrix to store
	 * borders between clusters, so it is more performant when the number of clusters is low
     *                                                            
     * args:                                                      
	 * - clusters* cluster 			: cluster object storing border info and cluster centers                 
     * - datapoint_info* dp_info 	: array of Datapoint structures                             
	 * - float_t Z 					: parameter to assess density peak significance
     * - halo 					    : flag to set if you want to compute also the halo points                               
     */

	#define borders cluster->borders
@@ -1902,6 +1872,8 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo)
        clusters_reset(cluster);
    }

    /* broadcast the number of elements on those lists */

    MPI_Bcast(&(cluster -> centers.count), 1, MPI_UINT64_T, 0, ctx -> mpi_communicator);
    MPI_Bcast(&(cluster -> centers.size), 1, MPI_UINT64_T, 0, ctx -> mpi_communicator);

@@ -1992,9 +1964,10 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo)
				{
					int cidx = dp_info[i].cluster_idx;
					int halo_flag = dp_info[i].log_rho_c < max_border_den_array[cidx]; 
					//int halo_flag = max_border_den_array[cidx] > dp_info[i].log_rho_c  ; 

                    //changed_here

                    //doing this we can have both the assignment and if it is
                    //part of the halo, without the need for storing other info
                    //halo points have cidx < 0 (old cidx = (c + 1) * -1 )
					dp_info[i].cluster_idx = halo_flag ? (cidx * (-1)) - 1 : cidx;
				}
@@ -2012,9 +1985,6 @@ void Heuristic3(global_context_t* ctx, clusters_t* cluster, float_t Z, int halo)
    free(old_to_new);

    /*free memory and put the correct arrays into place*/
    //free(ipos.data);
    //free(jpos.data);
    //

    #ifdef WRITE_FINAL_ASSIGNMENT
        int* cl = (int*)MY_MALLOC(ctx -> local_n_points * sizeof(int));
+2 −37
Original line number Diff line number Diff line
@@ -17,13 +17,7 @@ void get_context(global_context_t* ctx)
	ctx -> ub_box 	  = NULL;
    ctx -> rank_n_points  = (int*)malloc(ctx -> world_size * sizeof(int));
    ctx -> rank_idx_start = (int*)malloc(ctx -> world_size * sizeof(int));
    ctx -> idx_halo_points_recv = NULL;
    ctx -> idx_halo_points_send = NULL;
    ctx -> n_halo_points_recv = NULL;
    ctx -> n_halo_points_send = NULL;
    ctx -> halo_datapoints  = NULL;
    ctx -> local_datapoints = NULL;
    ctx -> __recv_heap_buffers = NULL;
    ctx -> __local_heap_buffers = NULL;
}

@@ -175,35 +169,6 @@ void free_context(global_context_t* ctx)
    //}

    FREE_NOT_NULL(ctx -> local_datapoints);
    if(ctx -> halo_datapoints)
    {
        for(int i = 0; i < ctx -> world_size; ++i) 
        {
            /*
            for(int j = 0; j < ctx -> n_halo_points_recv[i]; ++j)
            {
                FREE_NOT_NULL(ctx -> halo_datapoints[i][j].ngbh.data);
            }
            */
            FREE_NOT_NULL(ctx -> halo_datapoints[i]);
        }
    }
    FREE_NOT_NULL(ctx -> halo_datapoints);
    FREE_NOT_NULL(ctx -> __recv_heap_buffers);

    if(ctx -> idx_halo_points_recv)
    {
        for(int i = 0; i < ctx -> world_size; ++i) FREE_NOT_NULL(ctx -> idx_halo_points_recv[i]);
    }
    FREE_NOT_NULL(ctx -> idx_halo_points_recv);

    if(ctx -> idx_halo_points_send)
    {
        for(int i = 0; i < ctx -> world_size; ++i) FREE_NOT_NULL(ctx -> idx_halo_points_send[i]);
    }
    FREE_NOT_NULL(ctx -> idx_halo_points_send);
    FREE_NOT_NULL(ctx -> n_halo_points_recv);
    FREE_NOT_NULL(ctx -> n_halo_points_send);
    FREE_NOT_NULL(ctx -> rank_n_points);
    FREE_NOT_NULL(ctx -> rank_idx_start);
}
+48 −45
Original line number Diff line number Diff line
@@ -21,17 +21,6 @@
//#define PRINT_H1_CLUSTER_ASSIGN_COMPLETION
//#define PRINT_ORDERED_BUFFER

typedef struct datapoint_info_t {
    heap ngbh;
    int is_center;
    int cluster_idx;
    idx_t array_idx;
    idx_t kstar;
    float_t g;
    float_t log_rho;
    float_t log_rho_c;
    float_t log_rho_err;
} datapoint_info_t;

#define MAX(A,B) ((A) > (B) ? (A) : (B))
#define MIN(A,B) ((A) < (B) ? (A) : (B))
@@ -131,55 +120,69 @@ typedef struct datapoint_info_t {
    #define LOG_END
#endif

typedef struct datapoint_info_t {
    /*
     * datapoint object 
     */
    heap ngbh;              //heap object stores nearest neighbors of each point
    int is_center;          //flag signaling if a point is a cluster center
    int cluster_idx;        //cluster index
    idx_t array_idx;        //global index of the point in the dataset
    idx_t kstar;            //kstar value required for the density computation
    float_t g;              //density quantities, required by ADP the procedure
    float_t log_rho;        //
    float_t log_rho_c;      //
    float_t log_rho_err;    //
} datapoint_info_t;


struct global_context_t 
typedef struct global_context_t 
{
    /*
     * context object to store info related to each 
     * MPI process
     */
    char processor_mame[MPI_MAX_PROCESSOR_NAME];    //processor name
    int __processor_name_len;                       //processor name len
    int world_size;  
    int mpi_rank;
    int __processor_name_len;
    idx_t k;
	float_t* local_data;
	float_t* lb_box;
	float_t* ub_box;
    int* n_halo_points_recv;
    int* n_halo_points_send;
    idx_t** idx_halo_points_recv;
    idx_t** idx_halo_points_send;
    size_t n_points;
    size_t idx_start;
    size_t local_n_points;
    datapoint_info_t*  local_datapoints;
    datapoint_info_t** halo_datapoints;
    heap_node* __recieved_heap_data;
    uint32_t dims;
    int* rank_idx_start;
    int* rank_n_points;
	char processor_mame[MPI_MAX_PROCESSOR_NAME];
	MPI_Comm mpi_communicator;
    heap_node* __recv_heap_buffers;
    heap_node* __local_heap_buffers;
};

struct pointset_t
    int mpi_rank;                                   //rank of the processor
    uint32_t dims;                                  //number of dimensions of the dataset
    idx_t k;                                        //number of neighbors
	float_t* local_data;                            //pointer to the dataset stored into the node
	float_t* lb_box;                                //bounding box of the dataset
	float_t* ub_box;                                //bounding box of the dataset
    size_t n_points;                                //total number of points
    size_t idx_start;                               //starting index of the points on the processor
    size_t local_n_points;                          //number of points stored in the current processor
    datapoint_info_t*  local_datapoints;            //pointer to the datapoint properties
    int* rank_idx_start;                            //starting index of datapoints in each processor
    int* rank_n_points;                             //processor name
    heap_node* __local_heap_buffers;                //buffer that stores nearest neighbors
	MPI_Comm mpi_communicator;                      //mpi communicator
} global_context_t;

typedef struct pointset_t
{
    /*
     * Helper object to handle top kdtree 
     * construction, it represents the dataset
     * inside one node of the tree
     */
	size_t n_points;
	size_t __capacity;
	uint32_t dims;
	float_t* data;
	float_t* lb_box;
	float_t* ub_box;
};
} pointset_t;


struct lu_dynamic_array_t {
typedef struct lu_dynamic_array_t {
  idx_t *data;
  idx_t size;
  idx_t count;
};
} lu_dynamic_array_t;

typedef struct pointset_t pointset_t;
typedef struct global_context_t global_context_t;
typedef struct lu_dynamic_array_t lu_dynamic_array_t;

void mpi_printf(global_context_t*, const char *fmt, ...);
void get_context(global_context_t*);
+23 −4
Original line number Diff line number Diff line
@@ -11,12 +11,28 @@
    #define OUT_CLUSTER_ASSIGN "/beegfs/ftomba/phd/results/final_assignment.npy"
    #define OUT_HALO_FLAGS     "/beegfs/ftomba/phd/results/halo_flags.npy"
    #define OUT_DATA           "/beegfs/ftomba/phd/results/ordered_data.npy"
#else
#endif

#ifdef LEONARDO
    #define OUT_CLUSTER_ASSIGN "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/final_assignment.npy"
    #define OUT_HALO_FLAGS     "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/halo_flags.npy"
    #define OUT_DATA           "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/ordered_data.npy"
#endif

#ifdef LUMI
    #define OUT_CLUSTER_ASSIGN "~/scratch_dadp/out_dadp/final_assignment.npy"
    #define OUT_HALO_FLAGS     "~/scratch_dadp/out_dadp/halo_flags.npy"
    #define OUT_DATA           "~/scratch_dadp/out_dadp/ordered_data.npy"
#endif

#ifndef  OUT_CLUSTER_ASSIGN
    #define OUT_CLUSTER_ASSIGN "final_assignment.npy"
    #define OUT_HALO_FLAGS     "halo_flags.npy"
    #define OUT_DATA           "ordered_data.npy"
#endif



//

#ifdef THREAD_FUNNELED
@@ -25,6 +41,7 @@
    #define THREAD_LEVEL MPI_THREAD_MULTIPLE
#endif


int main(int argc, char** argv) {
    #if defined (_OPENMP)
        int mpi_provided_thread_level;
@@ -54,6 +71,7 @@ int main(int argc, char** argv) {
        MPI_Init(NULL, NULL);
    #endif


    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);
@@ -147,6 +165,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
        
        //1B points
        // data = read_data_file(ctx,"../norm_data/eds_box_acc_normalized",5,MY_FALSE);
        //data = read_data_file(ctx,"../norm_data/eds_box_6d",6,MY_FALSE);

        // 190M points
        // std_g2980844_091_0000
@@ -160,7 +179,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)

        /* 8M points */
        
        // data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",MY_TRUE);
        // data = read_data_file(ctx,"../norm_data/std_g0144846_Me14_091_0001",5,MY_TRUE);

        //88M 
        // data = read_data_file(ctx,"../norm_data/std_g5503149_091_0000",MY_TRUE);
Loading