kdtree_index.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef OPENCV_FLANN_KDTREE_INDEX_H_
32 #define OPENCV_FLANN_KDTREE_INDEX_H_
33 
34 #include <algorithm>
35 #include <map>
36 #include <cassert>
37 #include <cstring>
38 
39 #include "general.h"
40 #include "nn_index.h"
41 #include "dynamic_bitset.h"
42 #include "matrix.h"
43 #include "result_set.h"
44 #include "heap.h"
45 #include "allocator.h"
46 #include "random.h"
47 #include "saving.h"
48 
49 
50 namespace cvflann
51 {
52 
54 {
55  KDTreeIndexParams(int trees = 4)
56  {
57  (*this)["algorithm"] = FLANN_INDEX_KDTREE;
58  (*this)["trees"] = trees;
59  }
60 };
61 
62 
69 template <typename Distance>
70 class KDTreeIndex : public NNIndex<Distance>
71 {
72 public:
73  typedef typename Distance::ElementType ElementType;
74  typedef typename Distance::ResultType DistanceType;
75 
76 
85  Distance d = Distance() ) :
86  dataset_(inputData), index_params_(params), distance_(d)
87  {
88  size_ = dataset_.rows;
89  veclen_ = dataset_.cols;
90 
91  trees_ = get_param(index_params_,"trees",4);
92  tree_roots_ = new NodePtr[trees_];
93 
94  // Create a permutable array of indices to the input vectors.
95  vind_.resize(size_);
96  for (size_t i = 0; i < size_; ++i) {
97  vind_[i] = int(i);
98  }
99 
100  mean_ = new DistanceType[veclen_];
101  var_ = new DistanceType[veclen_];
102  }
103 
104 
105  KDTreeIndex(const KDTreeIndex&);
107 
112  {
113  if (tree_roots_!=NULL) {
114  delete[] tree_roots_;
115  }
116  delete[] mean_;
117  delete[] var_;
118  }
119 
123  void buildIndex()
124  {
125  /* Construct the randomized trees. */
126  for (int i = 0; i < trees_; i++) {
127  /* Randomize the order of vectors to allow for unbiased sampling. */
128  std::random_shuffle(vind_.begin(), vind_.end());
129  tree_roots_[i] = divideTree(&vind_[0], int(size_) );
130  }
131  }
132 
133 
135  {
136  return FLANN_INDEX_KDTREE;
137  }
138 
139 
140  void saveIndex(FILE* stream)
141  {
142  save_value(stream, trees_);
143  for (int i=0; i<trees_; ++i) {
144  save_tree(stream, tree_roots_[i]);
145  }
146  }
147 
148 
149 
150  void loadIndex(FILE* stream)
151  {
152  load_value(stream, trees_);
153  if (tree_roots_!=NULL) {
154  delete[] tree_roots_;
155  }
156  tree_roots_ = new NodePtr[trees_];
157  for (int i=0; i<trees_; ++i) {
158  load_tree(stream,tree_roots_[i]);
159  }
160 
161  index_params_["algorithm"] = getType();
162  index_params_["trees"] = tree_roots_;
163  }
164 
168  size_t size() const
169  {
170  return size_;
171  }
172 
176  size_t veclen() const
177  {
178  return veclen_;
179  }
180 
185  int usedMemory() const
186  {
187  return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory
188  }
189 
199  void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
200  {
201  int maxChecks = get_param(searchParams,"checks", 32);
202  float epsError = 1+get_param(searchParams,"eps",0.0f);
203 
204  if (maxChecks==FLANN_CHECKS_UNLIMITED) {
205  getExactNeighbors(result, vec, epsError);
206  }
207  else {
208  getNeighbors(result, vec, maxChecks, epsError);
209  }
210  }
211 
213  {
214  return index_params_;
215  }
216 
217 private:
218 
219 
220  /*--------------------- Internal Data Structures --------------------------*/
221  struct Node
222  {
226  int divfeat;
230  DistanceType divval;
234  Node* child1, * child2;
235  };
236  typedef Node* NodePtr;
237  typedef BranchStruct<NodePtr, DistanceType> BranchSt;
238  typedef BranchSt* Branch;
239 
240 
241 
242  void save_tree(FILE* stream, NodePtr tree)
243  {
244  save_value(stream, *tree);
245  if (tree->child1!=NULL) {
246  save_tree(stream, tree->child1);
247  }
248  if (tree->child2!=NULL) {
249  save_tree(stream, tree->child2);
250  }
251  }
252 
253 
254  void load_tree(FILE* stream, NodePtr& tree)
255  {
256  tree = pool_.allocate<Node>();
257  load_value(stream, *tree);
258  if (tree->child1!=NULL) {
259  load_tree(stream, tree->child1);
260  }
261  if (tree->child2!=NULL) {
262  load_tree(stream, tree->child2);
263  }
264  }
265 
266 
276  NodePtr divideTree(int* ind, int count)
277  {
278  NodePtr node = pool_.allocate<Node>(); // allocate memory
279 
280  /* If too few exemplars remain, then make this a leaf node. */
281  if ( count == 1) {
282  node->child1 = node->child2 = NULL; /* Mark as leaf node. */
283  node->divfeat = *ind; /* Store index of this vec. */
284  }
285  else {
286  int idx;
287  int cutfeat;
288  DistanceType cutval;
289  meanSplit(ind, count, idx, cutfeat, cutval);
290 
291  node->divfeat = cutfeat;
292  node->divval = cutval;
293  node->child1 = divideTree(ind, idx);
294  node->child2 = divideTree(ind+idx, count-idx);
295  }
296 
297  return node;
298  }
299 
300 
306  void meanSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval)
307  {
308  memset(mean_,0,veclen_*sizeof(DistanceType));
309  memset(var_,0,veclen_*sizeof(DistanceType));
310 
311  /* Compute mean values. Only the first SAMPLE_MEAN values need to be
312  sampled to get a good estimate.
313  */
314  int cnt = std::min((int)SAMPLE_MEAN+1, count);
315  for (int j = 0; j < cnt; ++j) {
316  ElementType* v = dataset_[ind[j]];
317  for (size_t k=0; k<veclen_; ++k) {
318  mean_[k] += v[k];
319  }
320  }
321  for (size_t k=0; k<veclen_; ++k) {
322  mean_[k] /= cnt;
323  }
324 
325  /* Compute variances (no need to divide by count). */
326  for (int j = 0; j < cnt; ++j) {
327  ElementType* v = dataset_[ind[j]];
328  for (size_t k=0; k<veclen_; ++k) {
329  DistanceType dist = v[k] - mean_[k];
330  var_[k] += dist * dist;
331  }
332  }
333  /* Select one of the highest variance indices at random. */
334  cutfeat = selectDivision(var_);
335  cutval = mean_[cutfeat];
336 
337  int lim1, lim2;
338  planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
339 
340  if (lim1>count/2) index = lim1;
341  else if (lim2<count/2) index = lim2;
342  else index = count/2;
343 
344  /* If either list is empty, it means that all remaining features
345  * are identical. Split in the middle to maintain a balanced tree.
346  */
347  if ((lim1==count)||(lim2==0)) index = count/2;
348  }
349 
350 
355  int selectDivision(DistanceType* v)
356  {
357  int num = 0;
358  size_t topind[RAND_DIM];
359 
360  /* Create a list of the indices of the top RAND_DIM values. */
361  for (size_t i = 0; i < veclen_; ++i) {
362  if ((num < RAND_DIM)||(v[i] > v[topind[num-1]])) {
363  /* Put this element at end of topind. */
364  if (num < RAND_DIM) {
365  topind[num++] = i; /* Add to list. */
366  }
367  else {
368  topind[num-1] = i; /* Replace last element. */
369  }
370  /* Bubble end value down to right location by repeated swapping. */
371  int j = num - 1;
372  while (j > 0 && v[topind[j]] > v[topind[j-1]]) {
373  std::swap(topind[j], topind[j-1]);
374  --j;
375  }
376  }
377  }
378  /* Select a random integer in range [0,num-1], and return that index. */
379  int rnd = rand_int(num);
380  return (int)topind[rnd];
381  }
382 
383 
393  void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2)
394  {
395  /* Move vector indices for left subtree to front of list. */
396  int left = 0;
397  int right = count-1;
398  for (;; ) {
399  while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left;
400  while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right;
401  if (left>right) break;
402  std::swap(ind[left], ind[right]); ++left; --right;
403  }
404  lim1 = left;
405  right = count-1;
406  for (;; ) {
407  while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left;
408  while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right;
409  if (left>right) break;
410  std::swap(ind[left], ind[right]); ++left; --right;
411  }
412  lim2 = left;
413  }
414 
419  void getExactNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, float epsError)
420  {
421  // checkID -= 1; /* Set a different unique ID for each search. */
422 
423  if (trees_ > 1) {
424  fprintf(stderr,"It doesn't make any sense to use more than one tree for exact search");
425  }
426  if (trees_>0) {
427  searchLevelExact(result, vec, tree_roots_[0], 0.0, epsError);
428  }
429  assert(result.full());
430  }
431 
437  void getNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, int maxCheck, float epsError)
438  {
439  int i;
440  BranchSt branch;
441 
442  int checkCount = 0;
443  Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
444  DynamicBitset checked(size_);
445 
446  /* Search once through each tree down to root. */
447  for (i = 0; i < trees_; ++i) {
448  searchLevel(result, vec, tree_roots_[i], 0, checkCount, maxCheck, epsError, heap, checked);
449  }
450 
451  /* Keep searching other branches from heap until finished. */
452  while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) {
453  searchLevel(result, vec, branch.node, branch.mindist, checkCount, maxCheck, epsError, heap, checked);
454  }
455 
456  delete heap;
457 
458  assert(result.full());
459  }
460 
461 
467  void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, NodePtr node, DistanceType mindist, int& checkCount, int maxCheck,
468  float epsError, Heap<BranchSt>* heap, DynamicBitset& checked)
469  {
470  if (result_set.worstDist()<mindist) {
471  // printf("Ignoring branch, too far\n");
472  return;
473  }
474 
475  /* If this is a leaf node, then do check and return. */
476  if ((node->child1 == NULL)&&(node->child2 == NULL)) {
477  /* Do not check same node more than once when searching multiple trees.
478  Once a vector is checked, we set its location in vind to the
479  current checkID.
480  */
481  int index = node->divfeat;
482  if ( checked.test(index) || ((checkCount>=maxCheck)&& result_set.full()) ) return;
483  checked.set(index);
484  checkCount++;
485 
486  DistanceType dist = distance_(dataset_[index], vec, veclen_);
487  result_set.addPoint(dist,index);
488 
489  return;
490  }
491 
492  /* Which child branch should be taken first? */
493  ElementType val = vec[node->divfeat];
494  DistanceType diff = val - node->divval;
495  NodePtr bestChild = (diff < 0) ? node->child1 : node->child2;
496  NodePtr otherChild = (diff < 0) ? node->child2 : node->child1;
497 
498  /* Create a branch record for the branch not taken. Add distance
499  of this feature boundary (we don't attempt to correct for any
500  use of this feature in a parent node, which is unlikely to
501  happen and would have only a small effect). Don't bother
502  adding more branches to heap after halfway point, as cost of
503  adding exceeds their value.
504  */
505 
506  DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat);
507  // if (2 * checkCount < maxCheck || !result.full()) {
508  if ((new_distsq*epsError < result_set.worstDist())|| !result_set.full()) {
509  heap->insert( BranchSt(otherChild, new_distsq) );
510  }
511 
512  /* Call recursively to search next level down. */
513  searchLevel(result_set, vec, bestChild, mindist, checkCount, maxCheck, epsError, heap, checked);
514  }
515 
519  void searchLevelExact(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, const float epsError)
520  {
521  /* If this is a leaf node, then do check and return. */
522  if ((node->child1 == NULL)&&(node->child2 == NULL)) {
523  int index = node->divfeat;
524  DistanceType dist = distance_(dataset_[index], vec, veclen_);
525  result_set.addPoint(dist,index);
526  return;
527  }
528 
529  /* Which child branch should be taken first? */
530  ElementType val = vec[node->divfeat];
531  DistanceType diff = val - node->divval;
532  NodePtr bestChild = (diff < 0) ? node->child1 : node->child2;
533  NodePtr otherChild = (diff < 0) ? node->child2 : node->child1;
534 
535  /* Create a branch record for the branch not taken. Add distance
536  of this feature boundary (we don't attempt to correct for any
537  use of this feature in a parent node, which is unlikely to
538  happen and would have only a small effect). Don't bother
539  adding more branches to heap after halfway point, as cost of
540  adding exceeds their value.
541  */
542 
543  DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat);
544 
545  /* Call recursively to search next level down. */
546  searchLevelExact(result_set, vec, bestChild, mindist, epsError);
547 
548  if (new_distsq*epsError<=result_set.worstDist()) {
549  searchLevelExact(result_set, vec, otherChild, new_distsq, epsError);
550  }
551  }
552 
553 
554 private:
555 
556  enum
557  {
563  SAMPLE_MEAN = 100,
571  RAND_DIM=5
572  };
573 
574 
578  int trees_;
579 
583  std::vector<int> vind_;
584 
588  const Matrix<ElementType> dataset_;
589 
590  IndexParams index_params_;
591 
592  size_t size_;
593  size_t veclen_;
594 
595 
596  DistanceType* mean_;
597  DistanceType* var_;
598 
599 
603  NodePtr* tree_roots_;
604 
612  PooledAllocator pool_;
613 
614  Distance distance_;
615 
616 
617 }; // class KDTreeForest
618 
619 }
620 
621 #endif //OPENCV_FLANN_KDTREE_INDEX_H_
flann_algorithm_t
Definition: defines.h:81
CvFileNode * node
Definition: core_c.h:1638
T get_param(const IndexParams &params, std::string name, const T &default_value)
Definition: params.h:59
int usedMemory
Definition: allocator.h:89
Distance::ResultType DistanceType
Definition: kdtree_index.h:74
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams)
Definition: kdtree_index.h:199
size_t cols
Definition: matrix.h:52
KDTreeIndex & operator=(const KDTreeIndex &)
int wastedMemory
Definition: allocator.h:90
const int * idx
Definition: core_c.h:323
T * allocate(size_t count=1)
Definition: allocator.h:178
int usedMemory() const
Definition: kdtree_index.h:185
GLuint index
Definition: core_c.h:986
int d
Definition: legacy.hpp:3064
KDTreeIndex(const Matrix< ElementType > &inputData, const IndexParams &params=KDTreeIndexParams(), Distance d=Distance())
Definition: kdtree_index.h:84
size_t size() const
Definition: kdtree_index.h:168
const CvArr const CvArr CvArr * result
Definition: core_c.h:805
KDTreeIndexParams(int trees=4)
Definition: kdtree_index.h:55
Definition: params.h:44
boost::dynamic_bitset DynamicBitset
Definition: dynamic_bitset.h:44
GLuint GLfloat * val
void buildIndex()
Definition: kdtree_index.h:123
size_t veclen() const
Definition: kdtree_index.h:176
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
computes per-element minimum of two arrays (dst = min(src1, src2))
const CvMat CvMat CvMat int k
Definition: legacy.hpp:3052
GLuint GLuint GLsizei count
Definition: core_c.h:973
GLdouble left
void load_value(FILE *stream, T &value, size_t count=1)
Definition: saving.h:147
const GLdouble * v
GLuint GLuint num
GLdouble GLdouble right
flann_algorithm_t getType() const
Definition: kdtree_index.h:134
Definition: result_set.h:66
std::map< std::string, any > IndexParams
Definition: params.h:42
~KDTreeIndex()
Definition: kdtree_index.h:111
void loadIndex(FILE *stream)
Loads the index from a stream.
Definition: kdtree_index.h:150
GLenum const GLfloat * params
Definition: compat.hpp:688
Distance::ElementType ElementType
Definition: kdtree_index.h:73
Definition: defines.h:84
IndexParams getParameters() const
Definition: kdtree_index.h:212
Definition: defines.h:170
Definition: kdtree_index.h:70
Definition: nn_index.h:48
size_t rows
Definition: matrix.h:51
void saveIndex(FILE *stream)
Saves the index to a stream.
Definition: kdtree_index.h:140
::max::max int
Definition: functional.hpp:324
Definition: kdtree_index.h:53
CV_EXPORTS void swap(Mat &a, Mat &b)
swaps two matrices
const CvArr * right
Definition: calib3d.hpp:353
void save_value(FILE *stream, const T &value, size_t count=1)
Definition: saving.h:126
GLclampf f
int rand_int(int high=RAND_MAX, int low=0)
Definition: random.h:72
CvPoint3D64f double * dist
Definition: legacy.hpp:556