Commit 88d661af authored by Francesco Tomba's avatar Francesco Tomba
Browse files

got 10d 1B

parent c629603d
Loading
Loading
Loading
Loading
+9 −4
Original line number Diff line number Diff line
@@ -31,13 +31,18 @@ export PSM2_MQ_RECVREQS_MAX=268435456
rm bb/*
mkdir bb

OUT_ASSIGNMENT=/leonardo_scratch/large/userexternal/ftomba00/assignment
OUT_DATA=/leonardo_scratch/large/userexternal/ftomba00/data

OUT_DIR=/leonardo_scratch/large/userexternal/ftomba00/exp1M
IN_DATA=/leonardo_work/EUHPC_D18_045

rm -rf ${OUT_DIR}
mkdir ${OUT_DIR}


#10^6 points 
time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK}  ./main -t f32 -k 100 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}
time mpirun -n ${SLURM_NTASKS} --map-by ppr:1:socket:PE=${SLURM_CPUS_PER_TASK}  ./main -t f32 -k 100 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -o ${OUT_DIR}


## to modify those one below
#time mpirun -n ${SLURM_NTASKS} --map-by core  ./main -t f32 -i ${IN_DATA}/norm_data/std_LR_091_0001 -d 5 -a ${OUT_ASSIGNMENT} -o ${OUT_DATA}

#34 * 10^6 points
+1 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ void get_context(global_context_t* ctx)
    ctx -> local_datapoints = NULL;
    ctx -> __local_heap_buffers = NULL;
    ctx -> input_data_in_float32 = -1;
    ctx -> local_tree_type = KD;
    ctx -> dims = 0;
    ctx -> k = 300;
    ctx -> z = 3;
+6 −4
Original line number Diff line number Diff line
@@ -11,16 +11,16 @@
//#include <stdarg.h>

// #define PARALLEL_FIX_BORDERS
#define WRITE_SHUFFLED_DATA
#define WRITE_NGBH
// #define WRITE_SHUFFLED_DATA
// #define WRITE_NGBH
// #define WRITE_TOP_NODES
#define WRITE_DENSITY
// #define WRITE_DENSITY
// #define WRITE_CLUSTER_ASSIGN_H1
// #define WRITE_BORDERS
// #define WRITE_CENTERS_PRE_MERGING
// #define WRITE_MERGES_INFO
// #define WRITE_MERGING_TABLE
#define WRITE_FINAL_ASSIGNMENT
// #define WRITE_FINAL_ASSIGNMENT

// #define PRINT_NGBH_EXCHANGE_SCHEME
// #define PRINT_H2_COMM_SCHEME
@@ -162,6 +162,7 @@ typedef struct datapoint_info_t {
    float_t log_rho_err;    //
} datapoint_info_t;

enum local_tree_type {VP, KD};

typedef struct global_context_t 
{
@@ -188,6 +189,7 @@ typedef struct global_context_t
    idx_t* og_idxs;                                 //original indexes
    heap_node_t* __local_heap_buffers;              //buffer that stores nearest neighbors
	MPI_Comm mpi_communicator;                      //mpi communicator
    enum local_tree_type local_tree_type;           //local tree strategy
    int input_data_in_float32;
    char input_data_file[DEFAULT_STR_LEN];
    char output_assignment_file[DEFAULT_STR_LEN];
+150 −72
Original line number Diff line number Diff line
@@ -8,29 +8,6 @@
#include <unistd.h>
#include <getopt.h>


#ifdef AMONRA
    #pragma message "Hi, you are on amonra"
    #define OUT_CLUSTER_ASSIGN "/beegfs/ftomba/phd/results/final_assignment.npy"
    #define OUT_DATA           "/beegfs/ftomba/phd/results/ordered_data.npy"
#endif

#ifdef LEONARDO
    #define OUT_CLUSTER_ASSIGN "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/final_assignment.npy"
    #define OUT_DATA           "/leonardo_scratch/large/userexternal/ftomba00/out_dadp/ordered_data.npy"
#endif

#ifdef LUMI
    #define OUT_CLUSTER_ASSIGN "~/scratch_dadp/out_dadp/final_assignment.npy"
    #define OUT_DATA           "~/scratch_dadp/out_dadp/ordered_data.npy"
#endif

#ifndef  OUT_CLUSTER_ASSIGN
    #define OUT_CLUSTER_ASSIGN "final_assignment.npy"
    #define OUT_DATA           "ordered_data.npy"
#endif


#ifdef THREAD_FUNNELED
    #define THREAD_LEVEL MPI_THREAD_FUNNELED
#else
@@ -43,10 +20,10 @@ struct option long_options[] =
    {"in-data" , required_argument, 0, 'i'},
    {"in-dtype", required_argument, 0, 't'},
    {"in-dims" , required_argument, 0, 'd'},
    {"out-data", optional_argument, 0, 'o'},
    {"out-assignment", optional_argument, 0, 'a'},
    {"out-directory", required_argument, 0, 'o'},
    {"kngbh", optional_argument, 0, 'k'},
    {"z", optional_argument, 0, 'z'},
    {"knn-strategy", optional_argument, 0, 's'},
    {"help", optional_argument, 0, 'h'},
    {0, 0, 0, 0}
};
@@ -58,12 +35,10 @@ const char* help = "Distributed Advanced Density Peak\n"\
                   "    -d --in-dims        (required) number of dimensions of the data file, dadp expects something\n"\
                   "                                   of the form N x d where N is inferred from the lenght of the\n"\
                   "                                   data file\n"\
                   "    -o --out-data       (optional) output path for the data, the datafile is shuffled between\n"\
                   "                                   mpi tasks and datapoints are ordered default is `out_data` \n"\
                   "    -a --out-assignment (optional) output path for the cluster assignment output ranges [0 ... Nc - 1]\n"\
                   "                                   for core points halo points have indices [-Nc ... -1] conversion\n"\
                   "                                   of idx for an halo point is cluster_idx = -halo_idx - 1, default is `out_assignment`\n"\
                   "    -o --out-directory  (required) output path for the data, cluster assignment and original indices  \n"\
                   "                                   of the datapoints. Produces files like `data.[rank]`, `og_idx.[rank]` and `assignment.[rank]`\n"\
                   "    -k --kngbh          (optional) number of nearest neighbors to compute\n"\
                   "    -s --knn-strategy   (optional) strategy for local knn (allowed values 'kd' for kdtree, 'vp' for ball-tree (vantage point)\n"\
                   "    -z --z              (optional) number of nearest neighbors to compute\n";

void parse_args(global_context_t* ctx, int argc, char** argv)
@@ -72,10 +47,8 @@ void parse_args(global_context_t* ctx, int argc, char** argv)
    int opt;
    int input_file_set = 0;
    int input_type_set = 0;
    snprintf(ctx -> output_assignment_file, DEFAULT_STR_LEN, "%s", OUT_CLUSTER_ASSIGN);
    snprintf(ctx -> output_data_file, DEFAULT_STR_LEN, "%s", OUT_DATA);

    while((opt = getopt_long(argc, argv, "i:t:d:o:a:k:z:h", long_options, NULL)) != -1)
    while((opt = getopt_long(argc, argv, "i:t:d:o:k:z:hs:", long_options, NULL)) != -1)
    {
        switch(opt)
        {
@@ -105,9 +78,6 @@ void parse_args(global_context_t* ctx, int argc, char** argv)
            case 'o':
                strncpy(ctx -> output_data_file, optarg, DEFAULT_STR_LEN);
                break;
            case 'a':
                strncpy(ctx -> output_assignment_file, optarg, DEFAULT_STR_LEN);
                break;
            case 'k':
                ctx -> k = atoi(optarg);
                break;
@@ -119,6 +89,22 @@ void parse_args(global_context_t* ctx, int argc, char** argv)
                MPI_Finalize();
                exit(0);
            
            case 's':
                if(strncmp("vp", optarg, 2) == 0)
                {
                    ctx -> local_tree_type = VP;
                }
                else if(strncmp("kd", optarg, 2) == 0)
                {
                    ctx -> local_tree_type = KD;
                }
                else
                {
                    printf("Cannot find valid local tree type selection, exiting\n");
                    exit(0);
                }
                break;

            default:
                mpi_printf(ctx, "%s\n", help);
                MPI_Finalize();
@@ -161,8 +147,7 @@ void print_hello(global_context_t* ctx)
    mpi_printf(ctx, "Data file  .............> %s\n", ctx -> input_data_file);
    mpi_printf(ctx, "Input Type .............> float%d\n", ctx -> input_data_in_float32 ? 32 : 64);
    mpi_printf(ctx, "Dimensions .............> %d\n", ctx -> dims);
    mpi_printf(ctx, "Output data file .......> %s\n", ctx -> output_data_file);
    mpi_printf(ctx, "Output assignment file -> %s\n", ctx -> output_assignment_file);
    mpi_printf(ctx, "Output directory .......> %s\n", ctx -> output_data_file);
    mpi_printf(ctx, "k ......................> %lu\n", ctx -> k);
    mpi_printf(ctx, "Z ......................> %.2lf\n", ctx -> z);
    mpi_printf(ctx, "\nRUNNING!\n");
@@ -255,15 +240,33 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    int halo = MY_TRUE;
    float_t tol = 0.002;

    if(I_AM_MASTER && ctx -> world_size <= 6)
    if(ctx -> world_size <= 6)
    {
        if(I_AM_MASTER)
        {
        test_file_path(ctx -> output_data_file);
        test_file_path(ctx -> output_assignment_file);
            char out_file[200];

            snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file);
            test_file_path(out_file);

            snprintf(out_file, 200, "%s/assignment", ctx->output_data_file);
            test_file_path(out_file);

            snprintf(out_file, 200, "%s/data", ctx->output_data_file);
            test_file_path(out_file);
        }
    }
    else
    {
        test_distributed_file_path(ctx, ctx -> output_data_file);
        test_distributed_file_path(ctx, ctx -> output_assignment_file);
        char out_file[200];
        snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file);
        test_distributed_file_path(ctx, out_file);

        snprintf(out_file, 200, "%s/assignment", ctx->output_data_file);
        test_distributed_file_path(ctx, out_file);

        snprintf(out_file, 200, "%s/data", ctx->output_data_file);
        test_distributed_file_path(ctx, out_file);
    }
    

@@ -358,10 +361,6 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    elapsed_time = TIME_STOP;
    LOG_WRITE("Top kdtree build and domain decomposition", elapsed_time);

    TIME_START;
    kdtree_t local_tree;
    kdtree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims);

    datapoint_info_t* dp_info = (datapoint_info_t*)MY_MALLOC(ctx -> local_n_points * sizeof(datapoint_info_t));            
    for(uint64_t i = 0; i < ctx -> local_n_points; ++i)
    {
@@ -378,22 +377,64 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    }
    ctx -> local_datapoints = dp_info;

    TIME_START;

    void* opaque_local_tree;

    switch(ctx -> local_tree_type)
    {
        case KD:
            {
                kdtree_t local_tree;
                kdtree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims);


                build_tree_kdtree_parallel(&local_tree);
                // build_tree_kdtree_sample(&local_tree);
                ctx -> local_data = local_tree.data;
                
                elapsed_time = TIME_STOP;
    LOG_WRITE("Local trees init and build", elapsed_time);
                LOG_WRITE("Local kdtrees init and build", elapsed_time);

                TIME_START;
                MPI_DB_PRINT("----- Performing ngbh search -----\n");
                MPI_Barrier(ctx -> mpi_communicator);

                mpi_all_knn_search_kdtree(ctx, dp_info, &tree, &local_tree, ctx -> k);

                MPI_Barrier(ctx -> mpi_communicator);
                elapsed_time = TIME_STOP;
                LOG_WRITE("Total time for all knn search w. local kdtrees", elapsed_time);
                opaque_local_tree = &local_tree;
            }
            break;
        case VP:
            {
                vptree_t local_tree;
                vptree_initialize( &local_tree, ctx -> local_data, ctx -> local_n_points, (unsigned int)ctx -> dims);


                build_tree_vptree(&local_tree);
                // build_tree_kdtree_sample(&local_tree);
                ctx -> local_data = local_tree.data;

                elapsed_time = TIME_STOP;
                LOG_WRITE("Vptrees init and build", elapsed_time);

                TIME_START;
                MPI_DB_PRINT("----- Performing ngbh search -----\n");
                MPI_Barrier(ctx -> mpi_communicator);

    mpi_all_knn_search(ctx, dp_info, &tree, &local_tree, ctx -> k);
                mpi_all_knn_search_vptree(ctx, dp_info, &tree, &local_tree, ctx -> k);

                MPI_Barrier(ctx -> mpi_communicator);
                elapsed_time = TIME_STOP;
    LOG_WRITE("Total time for all knn search", elapsed_time)
                LOG_WRITE("Total time for all knn search w. vptrees", elapsed_time);
                opaque_local_tree = &local_tree;
            }
            break;
    }



    TIME_START;
@@ -434,40 +475,77 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)

    
    TIME_START;
    int* cl = (int*)MY_MALLOC(ctx -> local_n_points * sizeof(int));
    int* cl = (int*)MY_MALLOC(2 * ctx -> local_n_points * sizeof(int));
    float_t* data_to_write = (float_t*)MY_MALLOC(ctx -> local_n_points * ctx -> k * sizeof(float_t));

    switch(ctx -> local_tree_type)
    {
        case KD:
            {
                kdtree_t local_tree = *((kdtree_t*)opaque_local_tree);
                for(int i = 0; i < ctx -> local_n_points; ++i)
                {
                    cl[i] = ctx -> local_datapoints[i].cluster_idx;
                    idx_t idx = local_tree.__points[i].array_idx;
                    memcpy(data_to_write + idx*ctx -> dims, local_tree.__points[i].data, ctx -> dims*sizeof(float_t));
                }
                kdtree_free(&local_tree);
            }
            break;

        case VP:
            {
                vptree_t local_tree = *((vptree_t*)opaque_local_tree);
                for(int i = 0; i < ctx -> local_n_points; ++i)
                {
                    cl[i] = ctx -> local_datapoints[i].cluster_idx;
                    idx_t idx = local_tree.__points[i].array_idx;
                    memcpy(data_to_write + idx*ctx -> dims, local_tree.__points[i].data, ctx -> dims*sizeof(float_t));
                }
                vptree_free(&local_tree);
            }
            break;

        default:
            break;
    }

    if(ctx -> world_size <= 32)
    {
        big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, ctx -> output_assignment_file);
        big_ordered_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, ctx -> output_data_file);
        char out_file[200];

        free(cl);
        snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file);
        big_ordered_buffer_to_file(ctx, ctx->og_idxs, sizeof(idx_t), ctx -> local_n_points, out_file);

        snprintf(out_file, 200, "%s/assignment", ctx->output_data_file);
        big_ordered_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, out_file);

        snprintf(out_file, 200, "%s/data", ctx->output_data_file);
        big_ordered_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, out_file);
    }
    else
    {
        distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, ctx -> output_assignment_file);
        distributed_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, ctx -> output_data_file);
        char out_file[200];

        free(cl);
        snprintf(out_file, 200, "%s/og_idx", ctx->output_data_file);
        distributed_buffer_to_file(ctx, ctx->og_idxs, sizeof(idx_t), ctx -> local_n_points, out_file);

        snprintf(out_file, 200, "%s/assignment", ctx->output_data_file);
        distributed_buffer_to_file(ctx, cl, sizeof(int), ctx -> local_n_points, out_file);

        snprintf(out_file, 200, "%s/data", ctx->output_data_file);
        distributed_buffer_to_file(ctx, data_to_write, sizeof(double), ctx -> local_n_points * ctx -> dims, out_file);
    }



    elapsed_time = TIME_STOP;
    LOG_WRITE("Write results to file", elapsed_time);
    
    
    free(cl);
    free(data_to_write);
    top_tree_free(ctx, &tree);
    kdtree_free(&local_tree);
    //clusters_free(&clusters);

    free(send_counts);
+10 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
#include <time.h>
#include <float.h>

#define DEFAULT_LEAF_SIZE 32
#define DEFAULT_LEAF_SIZE 256

#define ALIGNMENT 64
#define CHECK_ALLOCATION_NO_CTX(x) if(!x){printf("[!!!] Failed allocation: %s at line %d \n", __FILE__, __LINE__ ); exit(1);}
@@ -49,6 +49,15 @@
    #define MAX(x,y) ((x > y) ? (x) : (y))
#endif

#ifndef POINT
    #define POINT
    typedef struct point_t
    {
        idx_t    array_idx;
        float_t* data;
    } point_t;
#endif

typedef struct 
{
    float_t* lb;
Loading