kdtree_single_index.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
32 #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
33 
34 #include <algorithm>
35 #include <map>
36 #include <cassert>
37 #include <cstring>
38 
39 #include "general.h"
40 #include "nn_index.h"
41 #include "matrix.h"
42 #include "result_set.h"
43 #include "heap.h"
44 #include "allocator.h"
45 #include "random.h"
46 #include "saving.h"
47 
48 namespace cvflann
49 {
50 
52 {
53  KDTreeSingleIndexParams(int leaf_max_size = 10, bool reorder = true, int dim = -1)
54  {
55  (*this)["algorithm"] = FLANN_INDEX_KDTREE_SINGLE;
56  (*this)["leaf_max_size"] = leaf_max_size;
57  (*this)["reorder"] = reorder;
58  (*this)["dim"] = dim;
59  }
60 };
61 
62 
69 template <typename Distance>
70 class KDTreeSingleIndex : public NNIndex<Distance>
71 {
72 public:
73  typedef typename Distance::ElementType ElementType;
74  typedef typename Distance::ResultType DistanceType;
75 
76 
85  Distance d = Distance() ) :
86  dataset_(inputData), index_params_(params), distance_(d)
87  {
88  size_ = dataset_.rows;
89  dim_ = dataset_.cols;
90  int dim_param = get_param(params,"dim",-1);
91  if (dim_param>0) dim_ = dim_param;
92  leaf_max_size_ = get_param(params,"leaf_max_size",10);
93  reorder_ = get_param(params,"reorder",true);
94 
95  // Create a permutable array of indices to the input vectors.
96  vind_.resize(size_);
97  for (size_t i = 0; i < size_; i++) {
98  vind_[i] = (int)i;
99  }
100  }
101 
104 
109  {
110  if (reorder_) delete[] data_.data;
111  }
112 
116  void buildIndex()
117  {
118  computeBoundingBox(root_bbox_);
119  root_node_ = divideTree(0, (int)size_, root_bbox_ ); // construct the tree
120 
121  if (reorder_) {
122  delete[] data_.data;
123  data_ = cvflann::Matrix<ElementType>(new ElementType[size_*dim_], size_, dim_);
124  for (size_t i=0; i<size_; ++i) {
125  for (size_t j=0; j<dim_; ++j) {
126  data_[i][j] = dataset_[vind_[i]][j];
127  }
128  }
129  }
130  else {
131  data_ = dataset_;
132  }
133  }
134 
136  {
138  }
139 
140 
141  void saveIndex(FILE* stream)
142  {
143  save_value(stream, size_);
144  save_value(stream, dim_);
145  save_value(stream, root_bbox_);
146  save_value(stream, reorder_);
147  save_value(stream, leaf_max_size_);
148  save_value(stream, vind_);
149  if (reorder_) {
150  save_value(stream, data_);
151  }
152  save_tree(stream, root_node_);
153  }
154 
155 
156  void loadIndex(FILE* stream)
157  {
158  load_value(stream, size_);
159  load_value(stream, dim_);
160  load_value(stream, root_bbox_);
161  load_value(stream, reorder_);
162  load_value(stream, leaf_max_size_);
163  load_value(stream, vind_);
164  if (reorder_) {
165  load_value(stream, data_);
166  }
167  else {
168  data_ = dataset_;
169  }
170  load_tree(stream, root_node_);
171 
172 
173  index_params_["algorithm"] = getType();
174  index_params_["leaf_max_size"] = leaf_max_size_;
175  index_params_["reorder"] = reorder_;
176  }
177 
181  size_t size() const
182  {
183  return size_;
184  }
185 
189  size_t veclen() const
190  {
191  return dim_;
192  }
193 
198  int usedMemory() const
199  {
200  return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory
201  }
202 
203 
213  {
214  assert(queries.cols == veclen());
215  assert(indices.rows >= queries.rows);
216  assert(dists.rows >= queries.rows);
217  assert(int(indices.cols) >= knn);
218  assert(int(dists.cols) >= knn);
219 
220  KNNSimpleResultSet<DistanceType> resultSet(knn);
221  for (size_t i = 0; i < queries.rows; i++) {
222  resultSet.init(indices[i], dists[i]);
223  findNeighbors(resultSet, queries[i], params);
224  }
225  }
226 
228  {
229  return index_params_;
230  }
231 
241  void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
242  {
243  float epsError = 1+get_param(searchParams,"eps",0.0f);
244 
245  std::vector<DistanceType> dists(dim_,0);
246  DistanceType distsq = computeInitialDistances(vec, dists);
247  searchLevel(result, vec, root_node_, distsq, dists, epsError);
248  }
249 
250 private:
251 
252 
253  /*--------------------- Internal Data Structures --------------------------*/
254  struct Node
255  {
259  int left, right;
263  int divfeat;
267  DistanceType divlow, divhigh;
271  Node* child1, * child2;
272  };
273  typedef Node* NodePtr;
274 
275 
276  struct Interval
277  {
278  DistanceType low, high;
279  };
280 
281  typedef std::vector<Interval> BoundingBox;
282 
283  typedef BranchStruct<NodePtr, DistanceType> BranchSt;
284  typedef BranchSt* Branch;
285 
286 
287 
288 
289  void save_tree(FILE* stream, NodePtr tree)
290  {
291  save_value(stream, *tree);
292  if (tree->child1!=NULL) {
293  save_tree(stream, tree->child1);
294  }
295  if (tree->child2!=NULL) {
296  save_tree(stream, tree->child2);
297  }
298  }
299 
300 
301  void load_tree(FILE* stream, NodePtr& tree)
302  {
303  tree = pool_.allocate<Node>();
304  load_value(stream, *tree);
305  if (tree->child1!=NULL) {
306  load_tree(stream, tree->child1);
307  }
308  if (tree->child2!=NULL) {
309  load_tree(stream, tree->child2);
310  }
311  }
312 
313 
314  void computeBoundingBox(BoundingBox& bbox)
315  {
316  bbox.resize(dim_);
317  for (size_t i=0; i<dim_; ++i) {
318  bbox[i].low = (DistanceType)dataset_[0][i];
319  bbox[i].high = (DistanceType)dataset_[0][i];
320  }
321  for (size_t k=1; k<dataset_.rows; ++k) {
322  for (size_t i=0; i<dim_; ++i) {
323  if (dataset_[k][i]<bbox[i].low) bbox[i].low = (DistanceType)dataset_[k][i];
324  if (dataset_[k][i]>bbox[i].high) bbox[i].high = (DistanceType)dataset_[k][i];
325  }
326  }
327  }
328 
329 
339  NodePtr divideTree(int left, int right, BoundingBox& bbox)
340  {
341  NodePtr node = pool_.allocate<Node>(); // allocate memory
342 
343  /* If too few exemplars remain, then make this a leaf node. */
344  if ( (right-left) <= leaf_max_size_) {
345  node->child1 = node->child2 = NULL; /* Mark as leaf node. */
346  node->left = left;
347  node->right = right;
348 
349  // compute bounding-box of leaf points
350  for (size_t i=0; i<dim_; ++i) {
351  bbox[i].low = (DistanceType)dataset_[vind_[left]][i];
352  bbox[i].high = (DistanceType)dataset_[vind_[left]][i];
353  }
354  for (int k=left+1; k<right; ++k) {
355  for (size_t i=0; i<dim_; ++i) {
356  if (bbox[i].low>dataset_[vind_[k]][i]) bbox[i].low=(DistanceType)dataset_[vind_[k]][i];
357  if (bbox[i].high<dataset_[vind_[k]][i]) bbox[i].high=(DistanceType)dataset_[vind_[k]][i];
358  }
359  }
360  }
361  else {
362  int idx;
363  int cutfeat;
364  DistanceType cutval;
365  middleSplit_(&vind_[0]+left, right-left, idx, cutfeat, cutval, bbox);
366 
367  node->divfeat = cutfeat;
368 
369  BoundingBox left_bbox(bbox);
370  left_bbox[cutfeat].high = cutval;
371  node->child1 = divideTree(left, left+idx, left_bbox);
372 
373  BoundingBox right_bbox(bbox);
374  right_bbox[cutfeat].low = cutval;
375  node->child2 = divideTree(left+idx, right, right_bbox);
376 
377  node->divlow = left_bbox[cutfeat].high;
378  node->divhigh = right_bbox[cutfeat].low;
379 
380  for (size_t i=0; i<dim_; ++i) {
381  bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
382  bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
383  }
384  }
385 
386  return node;
387  }
388 
389  void computeMinMax(int* ind, int count, int dim, ElementType& min_elem, ElementType& max_elem)
390  {
391  min_elem = dataset_[ind[0]][dim];
392  max_elem = dataset_[ind[0]][dim];
393  for (int i=1; i<count; ++i) {
394  ElementType val = dataset_[ind[i]][dim];
395  if (val<min_elem) min_elem = val;
396  if (val>max_elem) max_elem = val;
397  }
398  }
399 
400  void middleSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
401  {
402  // find the largest span from the approximate bounding box
403  ElementType max_span = bbox[0].high-bbox[0].low;
404  cutfeat = 0;
405  cutval = (bbox[0].high+bbox[0].low)/2;
406  for (size_t i=1; i<dim_; ++i) {
407  ElementType span = bbox[i].high-bbox[i].low;
408  if (span>max_span) {
409  max_span = span;
410  cutfeat = i;
411  cutval = (bbox[i].high+bbox[i].low)/2;
412  }
413  }
414 
415  // compute exact span on the found dimension
416  ElementType min_elem, max_elem;
417  computeMinMax(ind, count, cutfeat, min_elem, max_elem);
418  cutval = (min_elem+max_elem)/2;
419  max_span = max_elem - min_elem;
420 
421  // check if a dimension of a largest span exists
422  size_t k = cutfeat;
423  for (size_t i=0; i<dim_; ++i) {
424  if (i==k) continue;
425  ElementType span = bbox[i].high-bbox[i].low;
426  if (span>max_span) {
427  computeMinMax(ind, count, i, min_elem, max_elem);
428  span = max_elem - min_elem;
429  if (span>max_span) {
430  max_span = span;
431  cutfeat = i;
432  cutval = (min_elem+max_elem)/2;
433  }
434  }
435  }
436  int lim1, lim2;
437  planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
438 
439  if (lim1>count/2) index = lim1;
440  else if (lim2<count/2) index = lim2;
441  else index = count/2;
442  }
443 
444 
445  void middleSplit_(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
446  {
447  const float EPS=0.00001f;
448  DistanceType max_span = bbox[0].high-bbox[0].low;
449  for (size_t i=1; i<dim_; ++i) {
450  DistanceType span = bbox[i].high-bbox[i].low;
451  if (span>max_span) {
452  max_span = span;
453  }
454  }
455  DistanceType max_spread = -1;
456  cutfeat = 0;
457  for (size_t i=0; i<dim_; ++i) {
458  DistanceType span = bbox[i].high-bbox[i].low;
459  if (span>(DistanceType)((1-EPS)*max_span)) {
460  ElementType min_elem, max_elem;
461  computeMinMax(ind, count, cutfeat, min_elem, max_elem);
462  DistanceType spread = (DistanceType)(max_elem-min_elem);
463  if (spread>max_spread) {
464  cutfeat = (int)i;
465  max_spread = spread;
466  }
467  }
468  }
469  // split in the middle
470  DistanceType split_val = (bbox[cutfeat].low+bbox[cutfeat].high)/2;
471  ElementType min_elem, max_elem;
472  computeMinMax(ind, count, cutfeat, min_elem, max_elem);
473 
474  if (split_val<min_elem) cutval = (DistanceType)min_elem;
475  else if (split_val>max_elem) cutval = (DistanceType)max_elem;
476  else cutval = split_val;
477 
478  int lim1, lim2;
479  planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
480 
481  if (lim1>count/2) index = lim1;
482  else if (lim2<count/2) index = lim2;
483  else index = count/2;
484  }
485 
486 
496  void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2)
497  {
498  /* Move vector indices for left subtree to front of list. */
499  int left = 0;
500  int right = count-1;
501  for (;; ) {
502  while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left;
503  while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right;
504  if (left>right) break;
505  std::swap(ind[left], ind[right]); ++left; --right;
506  }
507  /* If either list is empty, it means that all remaining features
508  * are identical. Split in the middle to maintain a balanced tree.
509  */
510  lim1 = left;
511  right = count-1;
512  for (;; ) {
513  while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left;
514  while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right;
515  if (left>right) break;
516  std::swap(ind[left], ind[right]); ++left; --right;
517  }
518  lim2 = left;
519  }
520 
521  DistanceType computeInitialDistances(const ElementType* vec, std::vector<DistanceType>& dists)
522  {
523  DistanceType distsq = 0.0;
524 
525  for (size_t i = 0; i < dim_; ++i) {
526  if (vec[i] < root_bbox_[i].low) {
527  dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].low, (int)i);
528  distsq += dists[i];
529  }
530  if (vec[i] > root_bbox_[i].high) {
531  dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].high, (int)i);
532  distsq += dists[i];
533  }
534  }
535 
536  return distsq;
537  }
538 
542  void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq,
543  std::vector<DistanceType>& dists, const float epsError)
544  {
545  /* If this is a leaf node, then do check and return. */
546  if ((node->child1 == NULL)&&(node->child2 == NULL)) {
547  DistanceType worst_dist = result_set.worstDist();
548  for (int i=node->left; i<node->right; ++i) {
549  int index = reorder_ ? i : vind_[i];
550  DistanceType dist = distance_(vec, data_[index], dim_, worst_dist);
551  if (dist<worst_dist) {
552  result_set.addPoint(dist,vind_[i]);
553  }
554  }
555  return;
556  }
557 
558  /* Which child branch should be taken first? */
559  int idx = node->divfeat;
560  ElementType val = vec[idx];
561  DistanceType diff1 = val - node->divlow;
562  DistanceType diff2 = val - node->divhigh;
563 
564  NodePtr bestChild;
565  NodePtr otherChild;
566  DistanceType cut_dist;
567  if ((diff1+diff2)<0) {
568  bestChild = node->child1;
569  otherChild = node->child2;
570  cut_dist = distance_.accum_dist(val, node->divhigh, idx);
571  }
572  else {
573  bestChild = node->child2;
574  otherChild = node->child1;
575  cut_dist = distance_.accum_dist( val, node->divlow, idx);
576  }
577 
578  /* Call recursively to search next level down. */
579  searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError);
580 
581  DistanceType dst = dists[idx];
582  mindistsq = mindistsq + cut_dist - dst;
583  dists[idx] = cut_dist;
584  if (mindistsq*epsError<=result_set.worstDist()) {
585  searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError);
586  }
587  dists[idx] = dst;
588  }
589 
590 private:
591 
595  const Matrix<ElementType> dataset_;
596 
597  IndexParams index_params_;
598 
599  int leaf_max_size_;
600  bool reorder_;
601 
602 
606  std::vector<int> vind_;
607 
608  Matrix<ElementType> data_;
609 
610  size_t size_;
611  size_t dim_;
612 
616  NodePtr root_node_;
617 
618  BoundingBox root_bbox_;
619 
627  PooledAllocator pool_;
628 
629  Distance distance_;
630 }; // class KDTree
631 
632 }
633 
634 #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
Definition: defines.h:87
flann_algorithm_t
Definition: defines.h:81
IndexParams getParameters() const
Definition: kdtree_single_index.h:227
CvFileNode * node
Definition: core_c.h:1638
T get_param(const IndexParams &params, std::string name, const T &default_value)
Definition: params.h:59
int usedMemory
Definition: allocator.h:89
size_t cols
Definition: matrix.h:52
CvSize CvPoint2D32f int count
Definition: calib3d.hpp:221
int wastedMemory
Definition: allocator.h:90
KDTreeSingleIndex & operator=(const KDTreeSingleIndex &)
const int * idx
Definition: core_c.h:323
T * allocate(size_t count=1)
Definition: allocator.h:178
GLuint index
Definition: core_c.h:986
int d
Definition: legacy.hpp:3064
KDTreeSingleIndex(const Matrix< ElementType > &inputData, const IndexParams &params=KDTreeSingleIndexParams(), Distance d=Distance())
Definition: kdtree_single_index.h:84
void init(int *indices_, DistanceType *dists_)
Definition: result_set.h:98
KDTreeSingleIndexParams(int leaf_max_size=10, bool reorder=true, int dim=-1)
Definition: kdtree_single_index.h:53
~KDTreeSingleIndex()
Definition: kdtree_single_index.h:108
Distance::ResultType DistanceType
Definition: kdtree_single_index.h:74
const CvArr const CvArr CvArr * result
Definition: core_c.h:805
Definition: params.h:44
GLuint GLfloat * val
Definition: kdtree_single_index.h:70
GLuint GLuint GLsizei GLenum const GLvoid * indices
Definition: legacy.hpp:3084
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams)
Definition: kdtree_single_index.h:241
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
computes per-element minimum of two arrays (dst = min(src1, src2))
Definition: kdtree_single_index.h:51
const CvMat CvMat CvMat int k
Definition: legacy.hpp:3052
GLuint GLuint GLsizei count
Definition: core_c.h:973
GLdouble left
void load_value(FILE *stream, T &value, size_t count=1)
Definition: saving.h:147
GLdouble GLdouble right
Definition: result_set.h:66
std::map< std::string, any > IndexParams
Definition: params.h:42
CvMat * dst
Definition: calib3d.hpp:76
void saveIndex(FILE *stream)
Saves the index to a stream.
Definition: kdtree_single_index.h:141
GLenum const GLfloat * params
Definition: compat.hpp:688
T * data
Definition: matrix.h:54
Definition: result_set.h:85
Distance::ElementType ElementType
Definition: kdtree_single_index.h:73
size_t veclen() const
Definition: kdtree_single_index.h:189
void knnSearch(const Matrix< ElementType > &queries, Matrix< int > &indices, Matrix< DistanceType > &dists, int knn, const SearchParams &params)
Perform k-nearest neighbor search.
Definition: kdtree_single_index.h:212
int usedMemory() const
Definition: kdtree_single_index.h:198
Definition: nn_index.h:48
size_t rows
Definition: matrix.h:51
GLuint dst
Definition: calib3d.hpp:134
::max::max int
Definition: functional.hpp:324
GLenum GLenum GLvoid GLvoid GLvoid * span
CV_EXPORTS void swap(Mat &a, Mat &b)
swaps two matrices
size_t size() const
Definition: kdtree_single_index.h:181
const CvArr * right
Definition: calib3d.hpp:353
void save_value(FILE *stream, const T &value, size_t count=1)
Definition: saving.h:126
GLclampf f
flann_algorithm_t getType() const
Definition: kdtree_single_index.h:135
void loadIndex(FILE *stream)
Loads the index from a stream.
Definition: kdtree_single_index.h:156
CvPoint3D64f double * dist
Definition: legacy.hpp:556
void buildIndex()
Definition: kdtree_single_index.h:116