Commit 7ce795f0 authored by lykos98's avatar lykos98
Browse files

added ML estimation of ID

parent 9ae06572
Loading
Loading
Loading
Loading
+136 −21

File changed.

Preview size limit exceeded, changes collapsed.

+45 −3
Original line number Diff line number Diff line
@@ -2449,6 +2449,47 @@ float_t mEst2(float_t * x, float_t *y, idx_t n)
    return num/den;
}

float_t compute_ID_two_NN_ML(global_context_t* ctx, datapoint_info_t* dp_info, idx_t n, int verbose)
{

    /*
     * Estimation of the intrinsic dimension of a dataset                                       
     * args:                                                                                    
     * - dp_info: array of structs                                                             
     * - n: number of dp_info                                                                  
     * intrinsic_dim = (N - 1) / np.sum(log_mus)
     */

    struct timespec start_tot, finish_tot;
    double elapsed_tot;

	if(verbose) 
    {
		printf("ID estimation:\n");
		clock_gettime(CLOCK_MONOTONIC, &start_tot);
	}
    
    float_t log_mus = 0;
    for(idx_t i = 0; i < n; ++i)
    {
        log_mus += 0.5 * log(dp_info[i].ngbh.data[2].value/dp_info[i].ngbh.data[1].value);
    }

    float_t d = 0;
    MPI_Allreduce(&log_mus, &d, 1, MPI_MY_FLOAT, MPI_SUM, ctx -> mpi_communicator);
    d = (ctx -> n_points - 1)/d;
	if(verbose)
	{
		clock_gettime(CLOCK_MONOTONIC, &finish_tot);
		elapsed_tot = (finish_tot.tv_sec - start_tot.tv_sec);
		elapsed_tot += (finish_tot.tv_nsec - start_tot.tv_nsec) / 1000000000.0;
		printf("\tID value: %.6lf\n", d);
		printf("\tTotal time: %.3lfs\n\n", elapsed_tot);
	}

    return d;

}


float_t id_estimate(global_context_t* ctx, datapoint_info_t* dp_info, idx_t n, float_t fraction, int verbose)
@@ -2710,7 +2751,7 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)

        //ctx -> n_points = 48*5*2000;
        ctx->n_points = ctx->n_points / ctx->dims;
        ctx->n_points = (ctx->n_points * 0.1) / 10;
        //ctx->n_points = (ctx->n_points * 0.1) / 10;
        // ctx -> n_points = ctx -> world_size * 1000;

        //ctx -> n_points = 10000000 * ctx -> world_size;
@@ -2822,11 +2863,12 @@ void simulate_master_read_and_scatter(int dims, size_t n, global_context_t *ctx)
    LOG_WRITE("Finding points to request the ngbh", elapsed_time)

    TIME_START;
    float_t id = id_estimate(ctx, dp_info, ctx -> local_n_points, 0.9, MY_FALSE);
    //float_t id = id_estimate(ctx, dp_info, ctx -> local_n_points, 0.9, MY_FALSE);
    float_t id = compute_ID_two_NN_ML(ctx, dp_info, ctx -> local_n_points, MY_FALSE);
    elapsed_time = TIME_STOP;
    //id = 3.920865231328582;
    //id = 4.008350298212649;
    id = 4.;
    //id = 4.;
    LOG_WRITE("ID estimate", elapsed_time)

    MPI_DB_PRINT("ID %lf \n",id);