Loading Makefile +2 −2 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O0 -g CFLAGS=-O3 -g LDFLAGS=-lm all: main obj=src/main/main.c src/tree/tree.c src/common/common.c obj=src/main/main.c src/tree/tree.c src/common/common.c src/tree/kdtreeV2.c src/tree/heap.c main: ${obj} ${CC} ${CFLAGS} ${LDFLAGS} ${obj} -o $@ Loading src/common/common.h +6 −5 Original line number Diff line number Diff line Loading @@ -49,16 +49,17 @@ struct global_context_t { size_t n_points; MPI_Comm mpi_communicator; int world_size; char processor_mame[MPI_MAX_PROCESSOR_NAME]; int __processor_name_len; int mpi_rank; size_t idx_start; size_t local_n_points; uint32_t dims; float_t* local_data; float_t* lb_box; float_t* ub_box; int world_size; int mpi_rank; int __processor_name_len; char processor_mame[MPI_MAX_PROCESSOR_NAME]; MPI_Comm mpi_communicator; }; struct pointset_t Loading src/tree/heap.c 0 → 100644 +213 −0 Original line number Diff line number Diff line #include "heap.h" void swap_heap_node(heap_node* a, heap_node* b){ heap_node tmp; memcpy(&tmp,a,sizeof(heap_node)); memcpy(a,b,sizeof(heap_node)); memcpy(b,&tmp,sizeof(heap_node)); return; } void allocate_heap(heap* H, idx_t n){ H -> data = (heap_node*)malloc(n*sizeof(heap_node)); H -> N = n; H -> count = 0; return; } void init_heap(heap* H){ for(idx_t i = 0; i < H -> N; ++i) { H -> data[i].value = 0.; H -> data[i].array_idx = MY_SIZE_MAX; } return; } void free_heap(heap * H){ free(H -> data);} void heapify_max_heap(heap* H, idx_t node){ idx_t nn = node; idx_t largest = nn; /* Found gratest between children of node and boundcheck if the node is a leaf */ while(1) { largest = (HEAP_LCH(nn) < H -> N) && (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; largest = (HEAP_RCH(nn) < H -> N) && (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; if(largest != nn) { swap_heap_node(H -> data + nn, H -> data + largest); nn = largest; } else { break; } } //if(HEAP_LCH(node) < H -> N){ // //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node); // largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; //} //if(HEAP_RCH(node) < H -> N){ // //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node); // largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; //} //if(largest == node){ // return; //} //else{ // swap_heap_node(H -> data + node, H -> data + largest); // heapify_max_heap(H, largest); //} } void set_root_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx){ H -> data[0].value = val; H -> data[0].array_idx = array_idx; heapify_max_heap(H,0); return; } void insert_max_heap_insertion_sort(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ heap_node tmpNode = {.value = val, .array_idx = array_idx}; if(H -> count < H -> N) { idx_t idx = H -> count; H -> data[idx] = tmpNode; ++(H -> count); while(idx >= 1) { if(H -> data[idx].value < H -> data[idx - 1].value) { swap_heap_node((H -> data) + idx, (H -> data) + idx - 1); idx--; } else { break; } } } else { if(H -> data[H -> count - 1].value > val) { idx_t idx = H -> count - 1; H -> data[idx] = tmpNode; while(idx >= 1) { if(H -> data[idx].value < H -> data[idx - 1].value) { swap_heap_node(H -> data + idx, H -> data + idx - 1); idx--; } else { break; } } } } return; } void insert_max_heap(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ int c1 = H -> count < H -> N; int c2 = (val < H -> data[0].value) && (!c1); int ctot = c1 + 2*c2; switch (ctot) { case 1: { idx_t node = H->count; ++(H -> count); H -> data[node].value = val; H -> data[node].array_idx = array_idx; /* * Push up the node through the heap */ while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) { swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); node = HEAP_PARENT(node); //if(node == 0) break; } } break; case 2: { set_root_max_heap(H,val,array_idx); } break; default: break; } /* alternative solution, left here if something breaks*/ //if(H -> count < H -> N){ // idx_t node = H->count; // ++(H -> count); // H -> data[node].value = val; // H -> data[node].array_idx = array_idx; // /* // * Push up the node through the heap // */ // while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) // { // swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); // node = HEAP_PARENT(node); // //if(node == 0) break; // } // return; //} //else if (val < H -> data[0].value) //{ // set_root_max_heap(H,val,array_idx); // return; //} //else //{ // return; //} } #ifdef USE_FLOAT32 #define EPS 5.96e-08 #else #define EPS 2.11e-16 #endif int cmp_heap_nodes(const void* a, const void* b) { const heap_node* aa = (const heap_node*)a; const heap_node* bb = (const heap_node*)b; int val = (aa -> value > bb -> value) - (aa -> value < bb -> value); //return vv; return val; } void heap_sort(heap* H){ idx_t n = H -> N; qsort(H -> data, n, sizeof(heap_node),cmp_heap_nodes); //for(idx_t i= (H -> N) - 1; i > 0; --i) //{ // swap_heap_node(H -> data, H -> data + i); // H -> N = i; // heapify_max_heap(H,0); //} //H -> N = n; } src/tree/heap.h 0 → 100644 +74 −0 Original line number Diff line number Diff line #pragma once #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <limits.h> #include <stdint.h> #define T double #define DATA_DIMS 0 #ifdef USE_FLOAT32 #define FLOAT_TYPE float #else #define FLOAT_TYPE double #endif #ifdef USE_INT32 #define MY_SIZE_MAX UINT32_MAX #define idx_t uint32_t #else #define MY_SIZE_MAX UINT64_MAX #define idx_t uint64_t #endif #define HEAP_LCH(x) (2*x + 1) #define HEAP_RCH(x) (2*x + 2) #define HEAP_PARENT(x) (x-1)/2 #define HP_LEFT_SIDE 0 #define HP_RIGHT_SIDE 1 struct heap_node { FLOAT_TYPE value; idx_t array_idx; } ; struct heap { idx_t N; idx_t count; struct heap_node* data; } ; struct SimpleHeap { idx_t N; T * data; } ; typedef struct SimpleHeap SimpleHeap; typedef struct heap heap; typedef struct heap_node heap_node; void allocate_heap(heap* H, idx_t n); void init_heap(heap* H); void free_heap(heap * H); void heapify_max_heap(heap* H, idx_t node); void set_root_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx); void insert_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx); void heap_sort(heap* H); void insert_max_heap_insertion_sort(heap * H,const FLOAT_TYPE val,const idx_t array_idx); int cmp_heap_nodes(const void* a, const void* b); src/tree/kdtreeV2.c 0 → 100644 +395 −0 Original line number Diff line number Diff line #include "kdtreeV2.h" #include "heap.h" #include <math.h> #include <stdint.h> #include <time.h> #define DEFAULT_LEAF_SIZE 30 extern unsigned int data_dims; FLOAT_TYPE eud_kdtree_v2(FLOAT_TYPE* restrict p1, FLOAT_TYPE* restrict p2){ register FLOAT_TYPE d = 0; for(unsigned int i = 0; i<data_dims; ++i){ register FLOAT_TYPE dd = (p1[i] - p2[i]); d += dd*dd; } return d; //return sqrt(d); } #ifdef USE_FLOAT32 typedef float v4f __attribute__ ((vector_size (16))); #else typedef double v4f __attribute__ ((vector_size (32))); #endif FLOAT_TYPE eud_kdtree_v2_2(FLOAT_TYPE* restrict u, FLOAT_TYPE* restrict v) { register float_t s; uint32_t i = 0; // manually unrolled loop, might be vectorized register v4f acc = {0., 0., 0., 0.}; for (; i + 4 <= data_dims; i += 4) { register v4f _u = {u[i], u[i + 1], u[i + 2], u[i + 3]}; register v4f _v = {v[i], v[i + 1], v[i + 2], v[i + 3]}; register v4f _diff = _u - _v; acc = _diff*_diff; } s = acc[0] + acc[1] + acc[2] + acc[3]; if (i < data_dims) { for(; i<data_dims; ++i) { float_t d = u[i] - v[i]; s += d * d; } } return s; } FLOAT_TYPE* swapMem_kdv2; void swap_kdnode_v2(kdnode_v2 *x, kdnode_v2 *y) { kdnode_v2 tmp; tmp = *x; *x = *y; *y = tmp; #ifdef SWMEM memcpy(swapMem_kdv2, x -> data, sizeof(FLOAT_TYPE)*data_dims); memcpy(x -> data, y -> data, sizeof(FLOAT_TYPE)*data_dims); memcpy(y -> data, swapMem_kdv2, sizeof(FLOAT_TYPE)*data_dims); FLOAT_TYPE* tmpPtr = x -> data; x -> data = y -> data; y -> data = tmpPtr; #endif } /** * * KDtree implementation * * */ void initialize_kdnodes_v2(kdnode_v2* node_array, FLOAT_TYPE* d, idx_t n ) { for(idx_t i = 0; i < n; ++i) { node_array[i].data = d + (i*data_dims); node_array[i].array_idx = i; node_array[i].lch = NULL; node_array[i].rch = NULL; node_array[i].parent = NULL; node_array[i].level = -1; node_array[i].is_leaf = 0; node_array[i].split_var = -1; node_array[i].node_list.data = NULL; node_array[i].node_list.count = 0; } } /* void initializePTRS(kdnode_v2** node_ptr_array, kdnode_v2* node_array, idx_t n ) { for(idx_t i = 0; i < n; ++i) { node_ptr_array[i] = node_array + i; } } */ int cmpKDnodesV2(kdnode_v2* a, kdnode_v2* b, int var){ FLOAT_TYPE res = a->data[var] - b->data[var]; return (res > 0); } void printKDnodeV2(kdnode_v2* node) { printf("Node %p:\n",node); printf("\t array_idx: %lu\n", (uint64_t)(node -> array_idx)); printf("\t data: "); for(unsigned int i=0; i<data_dims; ++i) printf(" %f ",node->data[i]); printf("\n"); printf("\t parent: %p\n",node -> parent); printf("\t lch: %p\n",node -> lch); printf("\t rch: %p\n",node -> rch); printf("\t level: %d\n", node -> level); printf("\t split_var: %d\n", node -> split_var); printf("\n"); } // Standard Lomuto partition function int partition_kdnode_v2(kdnode_v2* arr, int low, int high, int split_var) { kdnode_v2 pivot = arr[high]; int i = (low - 1); for (int j = low; j <= high - 1; j++) { if (!cmpKDnodesV2(arr + j,&pivot,split_var)) { i++; swap_kdnode_v2(arr + i, arr + j); } } swap_kdnode_v2(arr + i + 1, arr + high); return (i + 1); } // Implementation of QuickSelect int median_of_nodes_kdnode_v2(kdnode_v2* a, int left, int right, int split_var) { //printf("----------\n"); int k = left + ((right - left + 1)/2); if(left == right) return left; if(left == (right - 1)){ if(cmpKDnodesV2(a + left,a + right,split_var)) {swap_kdnode_v2(a + left, a + right);} return right; } while (left <= right) { // Partition a[left..right] around a pivot // and find the position of the pivot int pivotIndex = partition_kdnode_v2(a, left, right,split_var); //printf("%d %d %d %d\n",left, right, k, pivotIndex); // If pivot itself is the k-th smallest element if (pivotIndex == k) return pivotIndex; // If there are more than k-1 elements on // left of pivot, then k-th smallest must be // on left side. else if (pivotIndex > k) right = pivotIndex - 1; // Else k-th smallest is on right side. else left = pivotIndex + 1; } return -1; } kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* parent, int level) { kdnode_v2 *n = NULL; int split_var = level % data_dims; FLOAT_TYPE max_diff = -999999.; for(unsigned int v = 0; v < data_dims; ++v) { FLOAT_TYPE max_v = -9999999.; FLOAT_TYPE min_v = 9999999.; for(int i = start; i <= end; ++i) { max_v = t[i].data[v] > max_v ? t[i].data[v] : max_v; min_v = t[i].data[v] < min_v ? t[i].data[v] : min_v; } if((max_v - min_v) > max_diff) { max_diff = max_v - min_v; split_var = v; } } #ifdef SWMEM if(parent == NULL) { swapMem_kdv2 = (FLOAT_TYPE*)malloc(sizeof(FLOAT_TYPE)*data_dims); } #endif if(end - start < DEFAULT_LEAF_SIZE) { n = t + start; n -> is_leaf = 1; n -> parent = parent; n -> lch = NULL; n -> rch = NULL; size_t j = 0; n -> node_list.count = (size_t)(end - start + 1); n -> node_list.data = (kdnode_v2**)malloc(n -> node_list.count * sizeof(kdnode_v2*)); for(int i = start; i <= end; ++i){ t[i].parent = n; t[i].is_leaf = 1; t[i].lch = NULL; t[i].rch = NULL; n -> node_list.data[j] = t + i; ++j; } return n; } int median_idx = -1; //if ((end - start) < 0) return 0; //if (end == start) { // n = t + start; // n -> split_var = split_var; // n->parent = parent; // n->level = level; // n -> lch = NULL; // n -> rch = NULL; // return n; //} median_idx = median_of_nodes_kdnode_v2(t, start, end, split_var); //printf("%d median idx\n", median_idx); if(median_idx > -1){ swap_kdnode_v2(t + start, t + median_idx); //n = t + median_idx; n = t + start; //n->lch = make_tree_kdnode_v2(t, start, median_idx - 1, n, level + 1); n->lch = make_tree_kdnode_v2(t, start + 1, median_idx, n, level + 1); //n->rch = make_tree_kdnode_v2(t, median_idx + 1, end, n, level + 1); n->rch = make_tree_kdnode_v2(t, median_idx + 1, end, n, level + 1); n -> split_var = split_var; n->parent = parent; n->level = level; } #ifdef SWMEM if(parent == NULL) { swapMem_kdv2 = malloc(sizeof(FLOAT_TYPE)*data_dims); } #endif return n; } static inline FLOAT_TYPE hyper_plane_dist(FLOAT_TYPE* p1, FLOAT_TYPE* p2, int var) { return p1[var] - p2[var]; } static inline int hyper_plane_side(FLOAT_TYPE* p1, FLOAT_TYPE* p2, int var) { return p1[var] > p2[var]; } void knn_sub_tree_search_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* root, heap * H) { if(root -> is_leaf) { for(size_t i = 0; i < root -> node_list.count; ++i) { kdnode_v2* n = root -> node_list.data[i]; __builtin_prefetch(root -> node_list.data + i + 1, 0, 3); FLOAT_TYPE distance = eud_kdtree_v2(point, n -> data); insert_max_heap(H, distance,n -> array_idx); } return; } FLOAT_TYPE current_distance = eud_kdtree_v2(point, root -> data); FLOAT_TYPE hp_distance = hyper_plane_dist(point, root -> data, root -> split_var); insert_max_heap(H, current_distance, root -> array_idx); __builtin_prefetch(root -> lch, 0, 3); __builtin_prefetch(root -> rch, 0, 3); int side = hp_distance > 0.f; switch (side) { case HP_LEFT_SIDE: if(root -> lch) { knn_sub_tree_search_kdtree_v2(point, root -> lch, H); } break; case HP_RIGHT_SIDE: if(root -> rch) { knn_sub_tree_search_kdtree_v2(point, root -> rch, H); } break; default: break; } FLOAT_TYPE max_d = H -> data[0].value; int c = max_d > (hp_distance * hp_distance); //if(c || (H -> count) < (H -> N)) if(c) { switch (side) { case HP_LEFT_SIDE: if(root -> rch) { knn_sub_tree_search_kdtree_v2(point, root -> rch, H); } break; case HP_RIGHT_SIDE: if(root -> lch) { knn_sub_tree_search_kdtree_v2(point, root -> lch, H); } break; default: break; } } return; } void kdtree_v2_init(kdtree_v2* tree, FLOAT_TYPE* data, size_t n_nodes, unsigned int dims) { data_dims = dims; tree -> n_nodes = n_nodes; tree -> _nodes = (kdnode_v2*)malloc(n_nodes * sizeof(kdnode_v2)); initialize_kdnodes_v2(tree -> _nodes, data, n_nodes); tree -> root = NULL; } void kdtree_v2_free(kdtree_v2* tree) { free(tree -> _nodes); } heap knn_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk) { heap H; allocate_heap(&H,maxk); init_heap(&H); knn_sub_tree_search_kdtree_v2(point, kdtree_root,&H); heap_sort(&H); return H; } heap knn_kdtree_v2_no_heapsort(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk) { heap H; allocate_heap(&H,maxk); init_heap(&H); knn_sub_tree_search_kdtree_v2(point, kdtree_root,&H); return H; } kdnode_v2 * build_tree_kdtree_v2(kdnode_v2* kd_ptrs, size_t n, size_t dimensions ) { /************************************************* * Wrapper for make_tree function. * * Simplifies interfaces and takes time measures * *************************************************/ data_dims = dimensions; kdnode_v2* root = make_tree_kdnode_v2(kd_ptrs, 0, n-1, NULL ,0); return root; } Loading
Makefile +2 −2 Original line number Diff line number Diff line CC=mpicc CFLAGS=-O0 -g CFLAGS=-O3 -g LDFLAGS=-lm all: main obj=src/main/main.c src/tree/tree.c src/common/common.c obj=src/main/main.c src/tree/tree.c src/common/common.c src/tree/kdtreeV2.c src/tree/heap.c main: ${obj} ${CC} ${CFLAGS} ${LDFLAGS} ${obj} -o $@ Loading
src/common/common.h +6 −5 Original line number Diff line number Diff line Loading @@ -49,16 +49,17 @@ struct global_context_t { size_t n_points; MPI_Comm mpi_communicator; int world_size; char processor_mame[MPI_MAX_PROCESSOR_NAME]; int __processor_name_len; int mpi_rank; size_t idx_start; size_t local_n_points; uint32_t dims; float_t* local_data; float_t* lb_box; float_t* ub_box; int world_size; int mpi_rank; int __processor_name_len; char processor_mame[MPI_MAX_PROCESSOR_NAME]; MPI_Comm mpi_communicator; }; struct pointset_t Loading
src/tree/heap.c 0 → 100644 +213 −0 Original line number Diff line number Diff line #include "heap.h" void swap_heap_node(heap_node* a, heap_node* b){ heap_node tmp; memcpy(&tmp,a,sizeof(heap_node)); memcpy(a,b,sizeof(heap_node)); memcpy(b,&tmp,sizeof(heap_node)); return; } void allocate_heap(heap* H, idx_t n){ H -> data = (heap_node*)malloc(n*sizeof(heap_node)); H -> N = n; H -> count = 0; return; } void init_heap(heap* H){ for(idx_t i = 0; i < H -> N; ++i) { H -> data[i].value = 0.; H -> data[i].array_idx = MY_SIZE_MAX; } return; } void free_heap(heap * H){ free(H -> data);} void heapify_max_heap(heap* H, idx_t node){ idx_t nn = node; idx_t largest = nn; /* Found gratest between children of node and boundcheck if the node is a leaf */ while(1) { largest = (HEAP_LCH(nn) < H -> N) && (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; largest = (HEAP_RCH(nn) < H -> N) && (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; if(largest != nn) { swap_heap_node(H -> data + nn, H -> data + largest); nn = largest; } else { break; } } //if(HEAP_LCH(node) < H -> N){ // //if(H -> data[HEAP_LCH(node)].value > H -> data[largest].value ) largest = HEAP_LCH(node); // largest = (H -> data[HEAP_LCH(nn)].value > H -> data[largest].value ) ? HEAP_LCH(nn) : largest; //} //if(HEAP_RCH(node) < H -> N){ // //if(H -> data[HEAP_RCH(node)].value > H -> data[largest].value ) largest = HEAP_RCH(node); // largest = (H -> data[HEAP_RCH(nn)].value > H -> data[largest].value ) ? HEAP_RCH(nn) : largest; //} //if(largest == node){ // return; //} //else{ // swap_heap_node(H -> data + node, H -> data + largest); // heapify_max_heap(H, largest); //} } void set_root_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx){ H -> data[0].value = val; H -> data[0].array_idx = array_idx; heapify_max_heap(H,0); return; } void insert_max_heap_insertion_sort(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ heap_node tmpNode = {.value = val, .array_idx = array_idx}; if(H -> count < H -> N) { idx_t idx = H -> count; H -> data[idx] = tmpNode; ++(H -> count); while(idx >= 1) { if(H -> data[idx].value < H -> data[idx - 1].value) { swap_heap_node((H -> data) + idx, (H -> data) + idx - 1); idx--; } else { break; } } } else { if(H -> data[H -> count - 1].value > val) { idx_t idx = H -> count - 1; H -> data[idx] = tmpNode; while(idx >= 1) { if(H -> data[idx].value < H -> data[idx - 1].value) { swap_heap_node(H -> data + idx, H -> data + idx - 1); idx--; } else { break; } } } } return; } void insert_max_heap(heap * H,const FLOAT_TYPE val,const idx_t array_idx){ int c1 = H -> count < H -> N; int c2 = (val < H -> data[0].value) && (!c1); int ctot = c1 + 2*c2; switch (ctot) { case 1: { idx_t node = H->count; ++(H -> count); H -> data[node].value = val; H -> data[node].array_idx = array_idx; /* * Push up the node through the heap */ while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) { swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); node = HEAP_PARENT(node); //if(node == 0) break; } } break; case 2: { set_root_max_heap(H,val,array_idx); } break; default: break; } /* alternative solution, left here if something breaks*/ //if(H -> count < H -> N){ // idx_t node = H->count; // ++(H -> count); // H -> data[node].value = val; // H -> data[node].array_idx = array_idx; // /* // * Push up the node through the heap // */ // while(node && H -> data[node].value > H -> data[HEAP_PARENT(node)].value) // { // swap_heap_node(H -> data + node, H -> data + HEAP_PARENT(node)); // node = HEAP_PARENT(node); // //if(node == 0) break; // } // return; //} //else if (val < H -> data[0].value) //{ // set_root_max_heap(H,val,array_idx); // return; //} //else //{ // return; //} } #ifdef USE_FLOAT32 #define EPS 5.96e-08 #else #define EPS 2.11e-16 #endif int cmp_heap_nodes(const void* a, const void* b) { const heap_node* aa = (const heap_node*)a; const heap_node* bb = (const heap_node*)b; int val = (aa -> value > bb -> value) - (aa -> value < bb -> value); //return vv; return val; } void heap_sort(heap* H){ idx_t n = H -> N; qsort(H -> data, n, sizeof(heap_node),cmp_heap_nodes); //for(idx_t i= (H -> N) - 1; i > 0; --i) //{ // swap_heap_node(H -> data, H -> data + i); // H -> N = i; // heapify_max_heap(H,0); //} //H -> N = n; }
src/tree/heap.h 0 → 100644 +74 −0 Original line number Diff line number Diff line #pragma once #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <limits.h> #include <stdint.h> #define T double #define DATA_DIMS 0 #ifdef USE_FLOAT32 #define FLOAT_TYPE float #else #define FLOAT_TYPE double #endif #ifdef USE_INT32 #define MY_SIZE_MAX UINT32_MAX #define idx_t uint32_t #else #define MY_SIZE_MAX UINT64_MAX #define idx_t uint64_t #endif #define HEAP_LCH(x) (2*x + 1) #define HEAP_RCH(x) (2*x + 2) #define HEAP_PARENT(x) (x-1)/2 #define HP_LEFT_SIDE 0 #define HP_RIGHT_SIDE 1 struct heap_node { FLOAT_TYPE value; idx_t array_idx; } ; struct heap { idx_t N; idx_t count; struct heap_node* data; } ; struct SimpleHeap { idx_t N; T * data; } ; typedef struct SimpleHeap SimpleHeap; typedef struct heap heap; typedef struct heap_node heap_node; void allocate_heap(heap* H, idx_t n); void init_heap(heap* H); void free_heap(heap * H); void heapify_max_heap(heap* H, idx_t node); void set_root_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx); void insert_max_heap(heap * H, FLOAT_TYPE val, idx_t array_idx); void heap_sort(heap* H); void insert_max_heap_insertion_sort(heap * H,const FLOAT_TYPE val,const idx_t array_idx); int cmp_heap_nodes(const void* a, const void* b);
src/tree/kdtreeV2.c 0 → 100644 +395 −0 Original line number Diff line number Diff line #include "kdtreeV2.h" #include "heap.h" #include <math.h> #include <stdint.h> #include <time.h> #define DEFAULT_LEAF_SIZE 30 extern unsigned int data_dims; FLOAT_TYPE eud_kdtree_v2(FLOAT_TYPE* restrict p1, FLOAT_TYPE* restrict p2){ register FLOAT_TYPE d = 0; for(unsigned int i = 0; i<data_dims; ++i){ register FLOAT_TYPE dd = (p1[i] - p2[i]); d += dd*dd; } return d; //return sqrt(d); } #ifdef USE_FLOAT32 typedef float v4f __attribute__ ((vector_size (16))); #else typedef double v4f __attribute__ ((vector_size (32))); #endif FLOAT_TYPE eud_kdtree_v2_2(FLOAT_TYPE* restrict u, FLOAT_TYPE* restrict v) { register float_t s; uint32_t i = 0; // manually unrolled loop, might be vectorized register v4f acc = {0., 0., 0., 0.}; for (; i + 4 <= data_dims; i += 4) { register v4f _u = {u[i], u[i + 1], u[i + 2], u[i + 3]}; register v4f _v = {v[i], v[i + 1], v[i + 2], v[i + 3]}; register v4f _diff = _u - _v; acc = _diff*_diff; } s = acc[0] + acc[1] + acc[2] + acc[3]; if (i < data_dims) { for(; i<data_dims; ++i) { float_t d = u[i] - v[i]; s += d * d; } } return s; } FLOAT_TYPE* swapMem_kdv2; void swap_kdnode_v2(kdnode_v2 *x, kdnode_v2 *y) { kdnode_v2 tmp; tmp = *x; *x = *y; *y = tmp; #ifdef SWMEM memcpy(swapMem_kdv2, x -> data, sizeof(FLOAT_TYPE)*data_dims); memcpy(x -> data, y -> data, sizeof(FLOAT_TYPE)*data_dims); memcpy(y -> data, swapMem_kdv2, sizeof(FLOAT_TYPE)*data_dims); FLOAT_TYPE* tmpPtr = x -> data; x -> data = y -> data; y -> data = tmpPtr; #endif } /** * * KDtree implementation * * */ void initialize_kdnodes_v2(kdnode_v2* node_array, FLOAT_TYPE* d, idx_t n ) { for(idx_t i = 0; i < n; ++i) { node_array[i].data = d + (i*data_dims); node_array[i].array_idx = i; node_array[i].lch = NULL; node_array[i].rch = NULL; node_array[i].parent = NULL; node_array[i].level = -1; node_array[i].is_leaf = 0; node_array[i].split_var = -1; node_array[i].node_list.data = NULL; node_array[i].node_list.count = 0; } } /* void initializePTRS(kdnode_v2** node_ptr_array, kdnode_v2* node_array, idx_t n ) { for(idx_t i = 0; i < n; ++i) { node_ptr_array[i] = node_array + i; } } */ int cmpKDnodesV2(kdnode_v2* a, kdnode_v2* b, int var){ FLOAT_TYPE res = a->data[var] - b->data[var]; return (res > 0); } void printKDnodeV2(kdnode_v2* node) { printf("Node %p:\n",node); printf("\t array_idx: %lu\n", (uint64_t)(node -> array_idx)); printf("\t data: "); for(unsigned int i=0; i<data_dims; ++i) printf(" %f ",node->data[i]); printf("\n"); printf("\t parent: %p\n",node -> parent); printf("\t lch: %p\n",node -> lch); printf("\t rch: %p\n",node -> rch); printf("\t level: %d\n", node -> level); printf("\t split_var: %d\n", node -> split_var); printf("\n"); } // Standard Lomuto partition function int partition_kdnode_v2(kdnode_v2* arr, int low, int high, int split_var) { kdnode_v2 pivot = arr[high]; int i = (low - 1); for (int j = low; j <= high - 1; j++) { if (!cmpKDnodesV2(arr + j,&pivot,split_var)) { i++; swap_kdnode_v2(arr + i, arr + j); } } swap_kdnode_v2(arr + i + 1, arr + high); return (i + 1); } // Implementation of QuickSelect int median_of_nodes_kdnode_v2(kdnode_v2* a, int left, int right, int split_var) { //printf("----------\n"); int k = left + ((right - left + 1)/2); if(left == right) return left; if(left == (right - 1)){ if(cmpKDnodesV2(a + left,a + right,split_var)) {swap_kdnode_v2(a + left, a + right);} return right; } while (left <= right) { // Partition a[left..right] around a pivot // and find the position of the pivot int pivotIndex = partition_kdnode_v2(a, left, right,split_var); //printf("%d %d %d %d\n",left, right, k, pivotIndex); // If pivot itself is the k-th smallest element if (pivotIndex == k) return pivotIndex; // If there are more than k-1 elements on // left of pivot, then k-th smallest must be // on left side. else if (pivotIndex > k) right = pivotIndex - 1; // Else k-th smallest is on right side. else left = pivotIndex + 1; } return -1; } kdnode_v2* make_tree_kdnode_v2(kdnode_v2* t, int start, int end, kdnode_v2* parent, int level) { kdnode_v2 *n = NULL; int split_var = level % data_dims; FLOAT_TYPE max_diff = -999999.; for(unsigned int v = 0; v < data_dims; ++v) { FLOAT_TYPE max_v = -9999999.; FLOAT_TYPE min_v = 9999999.; for(int i = start; i <= end; ++i) { max_v = t[i].data[v] > max_v ? t[i].data[v] : max_v; min_v = t[i].data[v] < min_v ? t[i].data[v] : min_v; } if((max_v - min_v) > max_diff) { max_diff = max_v - min_v; split_var = v; } } #ifdef SWMEM if(parent == NULL) { swapMem_kdv2 = (FLOAT_TYPE*)malloc(sizeof(FLOAT_TYPE)*data_dims); } #endif if(end - start < DEFAULT_LEAF_SIZE) { n = t + start; n -> is_leaf = 1; n -> parent = parent; n -> lch = NULL; n -> rch = NULL; size_t j = 0; n -> node_list.count = (size_t)(end - start + 1); n -> node_list.data = (kdnode_v2**)malloc(n -> node_list.count * sizeof(kdnode_v2*)); for(int i = start; i <= end; ++i){ t[i].parent = n; t[i].is_leaf = 1; t[i].lch = NULL; t[i].rch = NULL; n -> node_list.data[j] = t + i; ++j; } return n; } int median_idx = -1; //if ((end - start) < 0) return 0; //if (end == start) { // n = t + start; // n -> split_var = split_var; // n->parent = parent; // n->level = level; // n -> lch = NULL; // n -> rch = NULL; // return n; //} median_idx = median_of_nodes_kdnode_v2(t, start, end, split_var); //printf("%d median idx\n", median_idx); if(median_idx > -1){ swap_kdnode_v2(t + start, t + median_idx); //n = t + median_idx; n = t + start; //n->lch = make_tree_kdnode_v2(t, start, median_idx - 1, n, level + 1); n->lch = make_tree_kdnode_v2(t, start + 1, median_idx, n, level + 1); //n->rch = make_tree_kdnode_v2(t, median_idx + 1, end, n, level + 1); n->rch = make_tree_kdnode_v2(t, median_idx + 1, end, n, level + 1); n -> split_var = split_var; n->parent = parent; n->level = level; } #ifdef SWMEM if(parent == NULL) { swapMem_kdv2 = malloc(sizeof(FLOAT_TYPE)*data_dims); } #endif return n; } static inline FLOAT_TYPE hyper_plane_dist(FLOAT_TYPE* p1, FLOAT_TYPE* p2, int var) { return p1[var] - p2[var]; } static inline int hyper_plane_side(FLOAT_TYPE* p1, FLOAT_TYPE* p2, int var) { return p1[var] > p2[var]; } void knn_sub_tree_search_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* root, heap * H) { if(root -> is_leaf) { for(size_t i = 0; i < root -> node_list.count; ++i) { kdnode_v2* n = root -> node_list.data[i]; __builtin_prefetch(root -> node_list.data + i + 1, 0, 3); FLOAT_TYPE distance = eud_kdtree_v2(point, n -> data); insert_max_heap(H, distance,n -> array_idx); } return; } FLOAT_TYPE current_distance = eud_kdtree_v2(point, root -> data); FLOAT_TYPE hp_distance = hyper_plane_dist(point, root -> data, root -> split_var); insert_max_heap(H, current_distance, root -> array_idx); __builtin_prefetch(root -> lch, 0, 3); __builtin_prefetch(root -> rch, 0, 3); int side = hp_distance > 0.f; switch (side) { case HP_LEFT_SIDE: if(root -> lch) { knn_sub_tree_search_kdtree_v2(point, root -> lch, H); } break; case HP_RIGHT_SIDE: if(root -> rch) { knn_sub_tree_search_kdtree_v2(point, root -> rch, H); } break; default: break; } FLOAT_TYPE max_d = H -> data[0].value; int c = max_d > (hp_distance * hp_distance); //if(c || (H -> count) < (H -> N)) if(c) { switch (side) { case HP_LEFT_SIDE: if(root -> rch) { knn_sub_tree_search_kdtree_v2(point, root -> rch, H); } break; case HP_RIGHT_SIDE: if(root -> lch) { knn_sub_tree_search_kdtree_v2(point, root -> lch, H); } break; default: break; } } return; } void kdtree_v2_init(kdtree_v2* tree, FLOAT_TYPE* data, size_t n_nodes, unsigned int dims) { data_dims = dims; tree -> n_nodes = n_nodes; tree -> _nodes = (kdnode_v2*)malloc(n_nodes * sizeof(kdnode_v2)); initialize_kdnodes_v2(tree -> _nodes, data, n_nodes); tree -> root = NULL; } void kdtree_v2_free(kdtree_v2* tree) { free(tree -> _nodes); } heap knn_kdtree_v2(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk) { heap H; allocate_heap(&H,maxk); init_heap(&H); knn_sub_tree_search_kdtree_v2(point, kdtree_root,&H); heap_sort(&H); return H; } heap knn_kdtree_v2_no_heapsort(FLOAT_TYPE* point, kdnode_v2* kdtree_root, int maxk) { heap H; allocate_heap(&H,maxk); init_heap(&H); knn_sub_tree_search_kdtree_v2(point, kdtree_root,&H); return H; } kdnode_v2 * build_tree_kdtree_v2(kdnode_v2* kd_ptrs, size_t n, size_t dimensions ) { /************************************************* * Wrapper for make_tree function. * * Simplifies interfaces and takes time measures * *************************************************/ data_dims = dimensions; kdnode_v2* root = make_tree_kdnode_v2(kd_ptrs, 0, n-1, NULL ,0); return root; }