Loading src/adp/adp.c +6 −136 Original line number Diff line number Diff line Loading @@ -116,142 +116,6 @@ idx_t get_j_ksel_idx(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win exposed } } void compute_density_kstarnn_rma(global_context_t* ctx, const float_t d, int verbose){ /* * Point density computation: * args: * - paricles: array of structs * - d : intrinsic dimension of the dataset * - points : number of points in the dataset */ MPI_Info info; MPI_Barrier(ctx -> mpi_communicator); idx_t k = ctx -> local_datapoints[0].ngbh.N; struct timespec start_tot, finish_tot; double elapsed_tot; datapoint_info_t* local_datapoints = ctx -> local_datapoints; if(verbose) { printf("Density and k* estimation:\n"); clock_gettime(CLOCK_MONOTONIC, &start_tot); } idx_t kMAX = ctx -> local_datapoints[0].ngbh.N - 1; float_t omega = 0.; if(sizeof(float_t) == sizeof(float)){ omega = powf(PI_F,d/2)/tgammaf(d/2.0f + 1.0f);} else{omega = pow(M_PI,d/2.)/tgamma(d/2.0 + 1.0);} MPI_Win exposed_ngbh; MPI_Win_create( ctx -> __local_heap_buffers, ctx -> local_n_points * k * sizeof(heap_node), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &exposed_ngbh); MPI_Win_fence(0, exposed_ngbh); MPI_Win_lock_all(0, exposed_ngbh); MPI_Barrier(ctx -> mpi_communicator); #pragma omp parallel for for(idx_t i = 0; i < ctx -> local_n_points; ++i) { for(idx_t k = 0; k <= kMAX; ++k) { local_datapoints[i].ngbh.data[k].value = omega * pow(local_datapoints[i].ngbh.data[k].value, d/2.); } } for(idx_t i = 0; i < ctx -> local_n_points; ++i) { idx_t j = 4; idx_t k; float_t dL = 0.; float_t vvi = 0.; float_t vvj = 0.; float_t vp = 0.; while(j < kMAX && dL < DTHR) { idx_t ksel = j - 1; //vvi = omega * pow(local_datapoints[i].ngbh.data[ksel].value,d/2.); vvi = local_datapoints[i].ngbh.data[ksel].value; idx_t jj = local_datapoints[i].ngbh.data[j].array_idx; /* * note jj can be an halo point * need to search maybe for it in foreign nodes * */ //float_t dist_jj = get_j_ksel_dist(ctx, jj, ksel, exposed_ngbh); //vvj = omega * pow(dist_jj,d/2.); vvj = get_j_ksel_dist(ctx, jj, ksel, exposed_ngbh); vp = (vvi + vvj)*(vvi + vvj); dL = -2.0 * ksel * log(4.*vvi*vvj/vp); j = j + 1; } if(j == kMAX) { k = j - 1; //vvi = omega * pow(ctx -> local_datapoints[i].ngbh.data[k].value,d/2.); vvi = ctx -> local_datapoints[i].ngbh.data[k].value; } else { k = j - 2; } local_datapoints[i].kstar = k; local_datapoints[i].log_rho = log((float_t)(k)/vvi/((float_t)(ctx -> n_points))); //dp_info[i].log_rho = log((float_t)(k)) - log(vvi) -log((float_t)(points)); local_datapoints[i].log_rho_err = 1.0/sqrt((float_t)k); //(float_t)(-Q_rsqrt((float)k)); local_datapoints[i].g = local_datapoints[i].log_rho - local_datapoints[i].log_rho_err; } if(verbose) { clock_gettime(CLOCK_MONOTONIC, &finish_tot); elapsed_tot = (finish_tot.tv_sec - start_tot.tv_sec); elapsed_tot += (finish_tot.tv_nsec - start_tot.tv_nsec) / 1000000000.0; printf("\tTotal time: %.3lfs\n\n", elapsed_tot); } MPI_Win_unlock_all(exposed_ngbh); MPI_Win_fence(0, exposed_ngbh); MPI_Win_free(&exposed_ngbh); #if defined(WRITE_DENSITY) /* densities */ float_t* den = (float_t*)MY_MALLOC(ctx -> local_n_points * sizeof(float_t)); float_t* gs = (float_t*)MY_MALLOC(ctx -> local_n_points * sizeof(float_t)); idx_t* ks = (idx_t*)MY_MALLOC(ctx -> local_n_points * sizeof(idx_t)); for(int i = 0; i < ctx -> local_n_points; ++i) den[i] = ctx -> local_datapoints[i].log_rho; for(int i = 0; i < ctx -> local_n_points; ++i) ks[i] = ctx -> local_datapoints[i].kstar; for(int i = 0; i < ctx -> local_n_points; ++i) gs[i] = ctx -> local_datapoints[i].g; ordered_buffer_to_file(ctx, den, sizeof(float_t), ctx -> local_n_points, "bb/ordered_density.npy"); ordered_buffer_to_file(ctx, ks, sizeof(idx_t), ctx -> local_n_points, "bb/ks.npy"); ordered_buffer_to_file(ctx, gs, sizeof(float_t), ctx -> local_n_points, "bb/g.npy"); ordered_data_to_file(ctx, "bb/ordered_data.npy"); free(den); free(ks); #endif return; } void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int verbose){ Loading Loading @@ -288,6 +152,12 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int else{omega = pow(M_PI,d/2.)/tgamma(d/2.0 + 1.0);} // [LT comment] it may be the case to promote this win to a global variable // and to leave it open until it is needed (then also for the next // heuristics // Also, shall we give som info argument to it ? // MPI_Win exposed_ngbh; MPI_Win_create( ctx -> __local_heap_buffers, ctx -> local_n_points * k * sizeof(heap_node), Loading src/tree/heap.h +3 −0 Original line number Diff line number Diff line Loading @@ -40,6 +40,9 @@ struct heap_node struct heap { // [LT comment] is it possible to drop this N in favor of a global variable, // when the tree is compiled to be used in adP? // if N is constant, then we can save 8 bytes here idx_t N; idx_t count; struct heap_node* data; Loading Loading
src/adp/adp.c +6 −136 Original line number Diff line number Diff line Loading @@ -116,142 +116,6 @@ idx_t get_j_ksel_idx(global_context_t* ctx, idx_t j, idx_t ksel, MPI_Win exposed } } void compute_density_kstarnn_rma(global_context_t* ctx, const float_t d, int verbose){ /* * Point density computation: * args: * - paricles: array of structs * - d : intrinsic dimension of the dataset * - points : number of points in the dataset */ MPI_Info info; MPI_Barrier(ctx -> mpi_communicator); idx_t k = ctx -> local_datapoints[0].ngbh.N; struct timespec start_tot, finish_tot; double elapsed_tot; datapoint_info_t* local_datapoints = ctx -> local_datapoints; if(verbose) { printf("Density and k* estimation:\n"); clock_gettime(CLOCK_MONOTONIC, &start_tot); } idx_t kMAX = ctx -> local_datapoints[0].ngbh.N - 1; float_t omega = 0.; if(sizeof(float_t) == sizeof(float)){ omega = powf(PI_F,d/2)/tgammaf(d/2.0f + 1.0f);} else{omega = pow(M_PI,d/2.)/tgamma(d/2.0 + 1.0);} MPI_Win exposed_ngbh; MPI_Win_create( ctx -> __local_heap_buffers, ctx -> local_n_points * k * sizeof(heap_node), 1, MPI_INFO_NULL, ctx -> mpi_communicator, &exposed_ngbh); MPI_Win_fence(0, exposed_ngbh); MPI_Win_lock_all(0, exposed_ngbh); MPI_Barrier(ctx -> mpi_communicator); #pragma omp parallel for for(idx_t i = 0; i < ctx -> local_n_points; ++i) { for(idx_t k = 0; k <= kMAX; ++k) { local_datapoints[i].ngbh.data[k].value = omega * pow(local_datapoints[i].ngbh.data[k].value, d/2.); } } for(idx_t i = 0; i < ctx -> local_n_points; ++i) { idx_t j = 4; idx_t k; float_t dL = 0.; float_t vvi = 0.; float_t vvj = 0.; float_t vp = 0.; while(j < kMAX && dL < DTHR) { idx_t ksel = j - 1; //vvi = omega * pow(local_datapoints[i].ngbh.data[ksel].value,d/2.); vvi = local_datapoints[i].ngbh.data[ksel].value; idx_t jj = local_datapoints[i].ngbh.data[j].array_idx; /* * note jj can be an halo point * need to search maybe for it in foreign nodes * */ //float_t dist_jj = get_j_ksel_dist(ctx, jj, ksel, exposed_ngbh); //vvj = omega * pow(dist_jj,d/2.); vvj = get_j_ksel_dist(ctx, jj, ksel, exposed_ngbh); vp = (vvi + vvj)*(vvi + vvj); dL = -2.0 * ksel * log(4.*vvi*vvj/vp); j = j + 1; } if(j == kMAX) { k = j - 1; //vvi = omega * pow(ctx -> local_datapoints[i].ngbh.data[k].value,d/2.); vvi = ctx -> local_datapoints[i].ngbh.data[k].value; } else { k = j - 2; } local_datapoints[i].kstar = k; local_datapoints[i].log_rho = log((float_t)(k)/vvi/((float_t)(ctx -> n_points))); //dp_info[i].log_rho = log((float_t)(k)) - log(vvi) -log((float_t)(points)); local_datapoints[i].log_rho_err = 1.0/sqrt((float_t)k); //(float_t)(-Q_rsqrt((float)k)); local_datapoints[i].g = local_datapoints[i].log_rho - local_datapoints[i].log_rho_err; } if(verbose) { clock_gettime(CLOCK_MONOTONIC, &finish_tot); elapsed_tot = (finish_tot.tv_sec - start_tot.tv_sec); elapsed_tot += (finish_tot.tv_nsec - start_tot.tv_nsec) / 1000000000.0; printf("\tTotal time: %.3lfs\n\n", elapsed_tot); } MPI_Win_unlock_all(exposed_ngbh); MPI_Win_fence(0, exposed_ngbh); MPI_Win_free(&exposed_ngbh); #if defined(WRITE_DENSITY) /* densities */ float_t* den = (float_t*)MY_MALLOC(ctx -> local_n_points * sizeof(float_t)); float_t* gs = (float_t*)MY_MALLOC(ctx -> local_n_points * sizeof(float_t)); idx_t* ks = (idx_t*)MY_MALLOC(ctx -> local_n_points * sizeof(idx_t)); for(int i = 0; i < ctx -> local_n_points; ++i) den[i] = ctx -> local_datapoints[i].log_rho; for(int i = 0; i < ctx -> local_n_points; ++i) ks[i] = ctx -> local_datapoints[i].kstar; for(int i = 0; i < ctx -> local_n_points; ++i) gs[i] = ctx -> local_datapoints[i].g; ordered_buffer_to_file(ctx, den, sizeof(float_t), ctx -> local_n_points, "bb/ordered_density.npy"); ordered_buffer_to_file(ctx, ks, sizeof(idx_t), ctx -> local_n_points, "bb/ks.npy"); ordered_buffer_to_file(ctx, gs, sizeof(float_t), ctx -> local_n_points, "bb/g.npy"); ordered_data_to_file(ctx, "bb/ordered_data.npy"); free(den); free(ks); #endif return; } void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int verbose){ Loading Loading @@ -288,6 +152,12 @@ void compute_density_kstarnn_rma_v2(global_context_t* ctx, const float_t d, int else{omega = pow(M_PI,d/2.)/tgamma(d/2.0 + 1.0);} // [LT comment] it may be the case to promote this win to a global variable // and to leave it open until it is needed (then also for the next // heuristics // Also, shall we give som info argument to it ? // MPI_Win exposed_ngbh; MPI_Win_create( ctx -> __local_heap_buffers, ctx -> local_n_points * k * sizeof(heap_node), Loading
src/tree/heap.h +3 −0 Original line number Diff line number Diff line Loading @@ -40,6 +40,9 @@ struct heap_node struct heap { // [LT comment] is it possible to drop this N in favor of a global variable, // when the tree is compiled to be used in adP? // if N is constant, then we can save 8 bytes here idx_t N; idx_t count; struct heap_node* data; Loading