Commit 04c3417b authored by lykos98's avatar lykos98
Browse files

added working implementation of h1 optim£

parent 792e2512
Loading
Loading
Loading
Loading
+40 −15
Original line number Original line Diff line number Diff line
@@ -510,8 +510,6 @@ datapoint_info_t find_possibly_halo_datapoint_rma(global_context_t* ctx, idx_t i
    else
    else
    {
    {
        datapoint_info_t tmp_dp;
        datapoint_info_t tmp_dp;
        #pragma omp critical
        {
        idx_t i = idx - ctx -> rank_idx_start[owner];
        idx_t i = idx - ctx -> rank_idx_start[owner];
        MPI_Request request;
        MPI_Request request;
        MPI_Status status;
        MPI_Status status;
@@ -520,8 +518,6 @@ datapoint_info_t find_possibly_halo_datapoint_rma(global_context_t* ctx, idx_t i
                i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request);
                i * sizeof(datapoint_info_t), sizeof(datapoint_info_t), MPI_BYTE, win_datapoints, &request);
        MPI_Wait(&request, MPI_STATUS_IGNORE);
        MPI_Wait(&request, MPI_STATUS_IGNORE);


        }

        return tmp_dp;         
        return tmp_dp;         
    }                 
    }                 
}
}
@@ -680,9 +676,11 @@ clusters_t Heuristic1(global_context_t *ctx)


    struct timespec start_tot, finish_tot;
    struct timespec start_tot, finish_tot;
    double elapsed_tot;
    double elapsed_tot;
    double elapsed_time;


    TIME_DEF;
    TIME_DEF;


    TIME_START;
    lu_dynamic_array_t all_centers, removed_centers, actual_centers, max_rho;
    lu_dynamic_array_t all_centers, removed_centers, actual_centers, max_rho;


    lu_dynamic_array_allocate(&all_centers);
    lu_dynamic_array_allocate(&all_centers);
@@ -698,7 +696,7 @@ clusters_t Heuristic1(global_context_t *ctx)
    MPI_Win_create(ctx -> local_datapoints, ctx -> local_n_points * sizeof(datapoint_info_t), 
    MPI_Win_create(ctx -> local_datapoints, ctx -> local_n_points * sizeof(datapoint_info_t), 
                   1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_datapoints);
                   1, MPI_INFO_NULL, ctx -> mpi_communicator, &win_datapoints);
    MPI_Win_fence(0, win_datapoints);
    MPI_Win_fence(0, win_datapoints);
    MPI_Win_lock_all(0,  win_datapoints);
    //MPI_Win_lock_all(0,  win_datapoints);


#if !defined(THREAD_FUNNELED)
#if !defined(THREAD_FUNNELED)
    #pragma omp parallel for
    #pragma omp parallel for
@@ -744,7 +742,6 @@ clusters_t Heuristic1(global_context_t *ctx)
     *
     *
     * optimized v2 use a queue of center removal and then exchange them
     * optimized v2 use a queue of center removal and then exchange them
	 */
	 */
		
	heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node));
	heap_node* to_remove_mask = (heap_node*)MY_MALLOC(n*sizeof(heap_node));


    for(idx_t p = 0; p < n; ++p) 
    for(idx_t p = 0; p < n; ++p) 
@@ -752,6 +749,9 @@ clusters_t Heuristic1(global_context_t *ctx)
        to_remove_mask[p].array_idx = MY_SIZE_MAX;
        to_remove_mask[p].array_idx = MY_SIZE_MAX;
        to_remove_mask[p].value = -9999999;
        to_remove_mask[p].value = -9999999;
    }
    }

    // sort by density

    qsort(dp_info_ptrs, n, sizeof(datapoint_info_t*), cmpPP);
    qsort(dp_info_ptrs, n, sizeof(datapoint_info_t*), cmpPP);


    /**
    /**
@@ -774,6 +774,13 @@ clusters_t Heuristic1(global_context_t *ctx)
        omp_init_lock(lock_array + i);
        omp_init_lock(lock_array + i);
    }
    }


    elapsed_time = TIME_STOP;
    LOG_WRITE("Putative centers", elapsed_time);
		
    TIME_START;

    MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints);

#if !defined(THREAD_FUNNELED)
#if !defined(THREAD_FUNNELED)
    #pragma omp parallel for schedule(dynamic)
    #pragma omp parallel for schedule(dynamic)
#endif
#endif
@@ -793,9 +800,10 @@ clusters_t Heuristic1(global_context_t *ctx)
                // actually is the p-th point
                // actually is the p-th point
                int owner = foreign_owner(ctx, jidx);
                int owner = foreign_owner(ctx, jidx);
                //if local process it
                //if local process it
                idx_t jpos = jidx - ctx -> idx_start;
                if(owner == ctx -> mpi_rank)
                if(owner == ctx -> mpi_rank)
                {
                {
                    idx_t jpos = jidx - ctx -> idx_start;
                    //acquire the lock
                    omp_set_lock(lock_array + jpos);
                    omp_set_lock(lock_array + jpos);
                    if(i_point.g > to_remove_mask[jpos].value)
                    if(i_point.g > to_remove_mask[jpos].value)
                    {
                    {
@@ -815,8 +823,14 @@ clusters_t Heuristic1(global_context_t *ctx)
        }
        }
    }
    }


    MPI_Win_fence(MPI_MODE_NOPUT, win_datapoints);

    //assemble arrays into a single buffer
    //assemble arrays into a single buffer


    elapsed_time = TIME_STOP;
    LOG_WRITE("Finding centers to prune", elapsed_time);
    TIME_START;
    
    idx_t tot_removal = 0;
    idx_t tot_removal = 0;
    for(idx_t p = 0; p < n; ++p)
    for(idx_t p = 0; p < n; ++p)
    {
    {
@@ -964,7 +978,11 @@ clusters_t Heuristic1(global_context_t *ctx)


    // merge into the mask
    // merge into the mask


    #pragma omp parallel for
    elapsed_time = TIME_STOP;
    LOG_WRITE("Communicating eliminations", elapsed_time);
    TIME_START;
    
    #pragma omp parallel for schedule(dynamic)
    for(idx_t i = 0; i < tot_recv_counts; ++i)
    for(idx_t i = 0; i < tot_recv_counts; ++i)
    {
    {
        idx_t el_pos = recv_removals[i].target_id - ctx -> idx_start;
        idx_t el_pos = recv_removals[i].target_id - ctx -> idx_start;
@@ -1046,6 +1064,11 @@ clusters_t Heuristic1(global_context_t *ctx)
    free(lock_array);
    free(lock_array);
    free(recv_removals);
    free(recv_removals);


    elapsed_time = TIME_STOP;
    LOG_WRITE("Merging", elapsed_time);

    TIME_START;

    int n_centers = (int)actual_centers.count;
    int n_centers = (int)actual_centers.count;
    int tot_centers;
    int tot_centers;
    MPI_Allreduce(&n_centers, &tot_centers, 1, MPI_INT, MPI_SUM, ctx -> mpi_communicator);
    MPI_Allreduce(&n_centers, &tot_centers, 1, MPI_INT, MPI_SUM, ctx -> mpi_communicator);
@@ -1188,7 +1211,7 @@ clusters_t Heuristic1(global_context_t *ctx)


    }
    }


    MPI_Win_unlock_all(win_datapoints);
    //MPI_Win_unlock_all(win_datapoints);
    MPI_Win_fence(0, win_datapoints);
    MPI_Win_fence(0, win_datapoints);
    MPI_Win_free(&win_datapoints);
    MPI_Win_free(&win_datapoints);


@@ -1209,6 +1232,8 @@ clusters_t Heuristic1(global_context_t *ctx)
        free(ks);
        free(ks);
    #endif
    #endif


    elapsed_time = TIME_STOP;
    LOG_WRITE("Cluster assign", elapsed_time);


    free(actual_centers.data);
    free(actual_centers.data);
    actual_centers.size  = tot_centers;
    actual_centers.size  = tot_centers;