kmeans_index.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef OPENCV_FLANN_KMEANS_INDEX_H_
32 #define OPENCV_FLANN_KMEANS_INDEX_H_
33 
34 #include <algorithm>
35 #include <string>
36 #include <map>
37 #include <cassert>
38 #include <limits>
39 #include <cmath>
40 
41 #include "general.h"
42 #include "nn_index.h"
43 #include "dist.h"
44 #include "matrix.h"
45 #include "result_set.h"
46 #include "heap.h"
47 #include "allocator.h"
48 #include "random.h"
49 #include "saving.h"
50 #include "logger.h"
51 
52 
53 namespace cvflann
54 {
55 
57 {
58  KMeansIndexParams(int branching = 32, int iterations = 11,
59  flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM, float cb_index = 0.2 )
60  {
61  (*this)["algorithm"] = FLANN_INDEX_KMEANS;
62  // branching factor
63  (*this)["branching"] = branching;
64  // max iterations to perform in one kmeans clustering (kmeans tree)
65  (*this)["iterations"] = iterations;
66  // algorithm used for picking the initial cluster centers for kmeans tree
67  (*this)["centers_init"] = centers_init;
68  // cluster boundary index. Used when searching the kmeans tree
69  (*this)["cb_index"] = cb_index;
70  }
71 };
72 
73 
80 template <typename Distance>
81 class KMeansIndex : public NNIndex<Distance>
82 {
83 public:
84  typedef typename Distance::ElementType ElementType;
85  typedef typename Distance::ResultType DistanceType;
86 
87 
88 
89  typedef void (KMeansIndex::* centersAlgFunction)(int, int*, int, int*, int&);
90 
95 
96 
97 
108  void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length)
109  {
110  UniqueRandom r(indices_length);
111 
112  int index;
113  for (index=0; index<k; ++index) {
114  bool duplicate = true;
115  int rnd;
116  while (duplicate) {
117  duplicate = false;
118  rnd = r.next();
119  if (rnd<0) {
120  centers_length = index;
121  return;
122  }
123 
124  centers[index] = indices[rnd];
125 
126  for (int j=0; j<index; ++j) {
127  DistanceType sq = distance_(dataset_[centers[index]], dataset_[centers[j]], dataset_.cols);
128  if (sq<1e-16) {
129  duplicate = true;
130  }
131  }
132  }
133  }
134 
135  centers_length = index;
136  }
137 
138 
149  void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length)
150  {
151  int n = indices_length;
152 
153  int rnd = rand_int(n);
154  assert(rnd >=0 && rnd < n);
155 
156  centers[0] = indices[rnd];
157 
158  int index;
159  for (index=1; index<k; ++index) {
160 
161  int best_index = -1;
162  DistanceType best_val = 0;
163  for (int j=0; j<n; ++j) {
164  DistanceType dist = distance_(dataset_[centers[0]],dataset_[indices[j]],dataset_.cols);
165  for (int i=1; i<index; ++i) {
166  DistanceType tmp_dist = distance_(dataset_[centers[i]],dataset_[indices[j]],dataset_.cols);
167  if (tmp_dist<dist) {
168  dist = tmp_dist;
169  }
170  }
171  if (dist>best_val) {
172  best_val = dist;
173  best_index = j;
174  }
175  }
176  if (best_index!=-1) {
177  centers[index] = indices[best_index];
178  }
179  else {
180  break;
181  }
182  }
183  centers_length = index;
184  }
185 
186 
200  void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length)
201  {
202  int n = indices_length;
203 
204  double currentPot = 0;
205  DistanceType* closestDistSq = new DistanceType[n];
206 
207  // Choose one random center and set the closestDistSq values
208  int index = rand_int(n);
209  assert(index >=0 && index < n);
210  centers[0] = indices[index];
211 
212  for (int i = 0; i < n; i++) {
213  closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
214  currentPot += closestDistSq[i];
215  }
216 
217 
218  const int numLocalTries = 1;
219 
220  // Choose each center
221  int centerCount;
222  for (centerCount = 1; centerCount < k; centerCount++) {
223 
224  // Repeat several trials
225  double bestNewPot = -1;
226  int bestNewIndex = -1;
227  for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
228 
229  // Choose our center - have to be slightly careful to return a valid answer even accounting
230  // for possible rounding errors
231  double randVal = rand_double(currentPot);
232  for (index = 0; index < n-1; index++) {
233  if (randVal <= closestDistSq[index]) break;
234  else randVal -= closestDistSq[index];
235  }
236 
237  // Compute the new potential
238  double newPot = 0;
239  for (int i = 0; i < n; i++) newPot += std::min( distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols), closestDistSq[i] );
240 
241  // Store the best result
242  if ((bestNewPot < 0)||(newPot < bestNewPot)) {
243  bestNewPot = newPot;
244  bestNewIndex = index;
245  }
246  }
247 
248  // Add the appropriate center
249  centers[centerCount] = indices[bestNewIndex];
250  currentPot = bestNewPot;
251  for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols), closestDistSq[i] );
252  }
253 
254  centers_length = centerCount;
255 
256  delete[] closestDistSq;
257  }
258 
259 
260 
261 public:
262 
264  {
265  return FLANN_INDEX_KMEANS;
266  }
267 
276  Distance d = Distance())
277  : dataset_(inputData), index_params_(params), root_(NULL), indices_(NULL), distance_(d)
278  {
279  memoryCounter_ = 0;
280 
281  size_ = dataset_.rows;
282  veclen_ = dataset_.cols;
283 
284  branching_ = get_param(params,"branching",32);
285  iterations_ = get_param(params,"iterations",11);
286  if (iterations_<0) {
287  iterations_ = (std::numeric_limits<int>::max)();
288  }
289  centers_init_ = get_param(params,"centers_init",FLANN_CENTERS_RANDOM);
290 
291  if (centers_init_==FLANN_CENTERS_RANDOM) {
293  }
294  else if (centers_init_==FLANN_CENTERS_GONZALES) {
296  }
297  else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
299  }
300  else {
301  throw FLANNException("Unknown algorithm for choosing initial centers.");
302  }
303  cb_index_ = 0.4f;
304 
305  }
306 
307 
308  KMeansIndex(const KMeansIndex&);
310 
311 
317  virtual ~KMeansIndex()
318  {
319  if (root_ != NULL) {
320  free_centers(root_);
321  }
322  if (indices_!=NULL) {
323  delete[] indices_;
324  }
325  }
326 
330  size_t size() const
331  {
332  return size_;
333  }
334 
338  size_t veclen() const
339  {
340  return veclen_;
341  }
342 
343 
344  void set_cb_index( float index)
345  {
346  cb_index_ = index;
347  }
348 
353  int usedMemory() const
354  {
355  return pool_.usedMemory+pool_.wastedMemory+memoryCounter_;
356  }
357 
361  void buildIndex()
362  {
363  if (branching_<2) {
364  throw FLANNException("Branching factor must be at least 2");
365  }
366 
367  indices_ = new int[size_];
368  for (size_t i=0; i<size_; ++i) {
369  indices_[i] = int(i);
370  }
371 
372  root_ = pool_.allocate<KMeansNode>();
373  computeNodeStatistics(root_, indices_, (int)size_);
374  computeClustering(root_, indices_, (int)size_, branching_,0);
375  }
376 
377 
378  void saveIndex(FILE* stream)
379  {
380  save_value(stream, branching_);
381  save_value(stream, iterations_);
382  save_value(stream, memoryCounter_);
383  save_value(stream, cb_index_);
384  save_value(stream, *indices_, (int)size_);
385 
386  save_tree(stream, root_);
387  }
388 
389 
390  void loadIndex(FILE* stream)
391  {
392  load_value(stream, branching_);
393  load_value(stream, iterations_);
394  load_value(stream, memoryCounter_);
395  load_value(stream, cb_index_);
396  if (indices_!=NULL) {
397  delete[] indices_;
398  }
399  indices_ = new int[size_];
400  load_value(stream, *indices_, size_);
401 
402  if (root_!=NULL) {
403  free_centers(root_);
404  }
405  load_tree(stream, root_);
406 
407  index_params_["algorithm"] = getType();
408  index_params_["branching"] = branching_;
409  index_params_["iterations"] = iterations_;
410  index_params_["centers_init"] = centers_init_;
411  index_params_["cb_index"] = cb_index_;
412 
413  }
414 
415 
425  void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
426  {
427 
428  int maxChecks = get_param(searchParams,"checks",32);
429 
430  if (maxChecks==FLANN_CHECKS_UNLIMITED) {
431  findExactNN(root_, result, vec);
432  }
433  else {
434  // Priority queue storing intermediate branches in the best-bin-first search
435  Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
436 
437  int checks = 0;
438  findNN(root_, result, vec, checks, maxChecks, heap);
439 
440  BranchSt branch;
441  while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
442  KMeansNodePtr node = branch.node;
443  findNN(node, result, vec, checks, maxChecks, heap);
444  }
445  assert(result.full());
446 
447  delete heap;
448  }
449 
450  }
451 
460  {
461  int numClusters = centers.rows;
462  if (numClusters<1) {
463  throw FLANNException("Number of clusters must be at least 1");
464  }
465 
466  DistanceType variance;
467  KMeansNodePtr* clusters = new KMeansNodePtr[numClusters];
468 
469  int clusterCount = getMinVarianceClusters(root_, clusters, numClusters, variance);
470 
471  Logger::info("Clusters requested: %d, returning %d\n",numClusters, clusterCount);
472 
473  for (int i=0; i<clusterCount; ++i) {
474  DistanceType* center = clusters[i]->pivot;
475  for (size_t j=0; j<veclen_; ++j) {
476  centers[i][j] = center[j];
477  }
478  }
479  delete[] clusters;
480 
481  return clusterCount;
482  }
483 
485  {
486  return index_params_;
487  }
488 
489 
490 private:
494  struct KMeansNode
495  {
499  DistanceType* pivot;
507  DistanceType mean_radius;
511  DistanceType variance;
515  int size;
519  KMeansNode** childs;
523  int* indices;
527  int level;
528  };
529  typedef KMeansNode* KMeansNodePtr;
530 
534  typedef BranchStruct<KMeansNodePtr, DistanceType> BranchSt;
535 
536 
537 
538 
539  void save_tree(FILE* stream, KMeansNodePtr node)
540  {
541  save_value(stream, *node);
542  save_value(stream, *(node->pivot), (int)veclen_);
543  if (node->childs==NULL) {
544  int indices_offset = (int)(node->indices - indices_);
545  save_value(stream, indices_offset);
546  }
547  else {
548  for(int i=0; i<branching_; ++i) {
549  save_tree(stream, node->childs[i]);
550  }
551  }
552  }
553 
554 
555  void load_tree(FILE* stream, KMeansNodePtr& node)
556  {
557  node = pool_.allocate<KMeansNode>();
558  load_value(stream, *node);
559  node->pivot = new DistanceType[veclen_];
560  load_value(stream, *(node->pivot), (int)veclen_);
561  if (node->childs==NULL) {
562  int indices_offset;
563  load_value(stream, indices_offset);
564  node->indices = indices_ + indices_offset;
565  }
566  else {
567  node->childs = pool_.allocate<KMeansNodePtr>(branching_);
568  for(int i=0; i<branching_; ++i) {
569  load_tree(stream, node->childs[i]);
570  }
571  }
572  }
573 
574 
578  void free_centers(KMeansNodePtr node)
579  {
580  delete[] node->pivot;
581  if (node->childs!=NULL) {
582  for (int k=0; k<branching_; ++k) {
583  free_centers(node->childs[k]);
584  }
585  }
586  }
587 
595  void computeNodeStatistics(KMeansNodePtr node, int* indices, int indices_length)
596  {
597 
598  DistanceType radius = 0;
599  DistanceType variance = 0;
600  DistanceType* mean = new DistanceType[veclen_];
601  memoryCounter_ += int(veclen_*sizeof(DistanceType));
602 
603  memset(mean,0,veclen_*sizeof(DistanceType));
604 
605  for (size_t i=0; i<size_; ++i) {
606  ElementType* vec = dataset_[indices[i]];
607  for (size_t j=0; j<veclen_; ++j) {
608  mean[j] += vec[j];
609  }
610  variance += distance_(vec, ZeroIterator<ElementType>(), veclen_);
611  }
612  for (size_t j=0; j<veclen_; ++j) {
613  mean[j] /= size_;
614  }
615  variance /= size_;
616  variance -= distance_(mean, ZeroIterator<ElementType>(), veclen_);
617 
618  DistanceType tmp = 0;
619  for (int i=0; i<indices_length; ++i) {
620  tmp = distance_(mean, dataset_[indices[i]], veclen_);
621  if (tmp>radius) {
622  radius = tmp;
623  }
624  }
625 
626  node->variance = variance;
627  node->radius = radius;
628  node->pivot = mean;
629  }
630 
631 
643  void computeClustering(KMeansNodePtr node, int* indices, int indices_length, int branching, int level)
644  {
645  node->size = indices_length;
646  node->level = level;
647 
648  if (indices_length < branching) {
649  node->indices = indices;
650  std::sort(node->indices,node->indices+indices_length);
651  node->childs = NULL;
652  return;
653  }
654 
655  int* centers_idx = new int[branching];
656  int centers_length;
657  (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
658 
659  if (centers_length<branching) {
660  node->indices = indices;
661  std::sort(node->indices,node->indices+indices_length);
662  node->childs = NULL;
663  delete [] centers_idx;
664  return;
665  }
666 
667 
668  Matrix<double> dcenters(new double[branching*veclen_],branching,veclen_);
669  for (int i=0; i<centers_length; ++i) {
670  ElementType* vec = dataset_[centers_idx[i]];
671  for (size_t k=0; k<veclen_; ++k) {
672  dcenters[i][k] = double(vec[k]);
673  }
674  }
675  delete[] centers_idx;
676 
677  std::vector<DistanceType> radiuses(branching);
678  int* count = new int[branching];
679  for (int i=0; i<branching; ++i) {
680  radiuses[i] = 0;
681  count[i] = 0;
682  }
683 
684  // assign points to clusters
685  int* belongs_to = new int[indices_length];
686  for (int i=0; i<indices_length; ++i) {
687 
688  DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
689  belongs_to[i] = 0;
690  for (int j=1; j<branching; ++j) {
691  DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
692  if (sq_dist>new_sq_dist) {
693  belongs_to[i] = j;
694  sq_dist = new_sq_dist;
695  }
696  }
697  if (sq_dist>radiuses[belongs_to[i]]) {
698  radiuses[belongs_to[i]] = sq_dist;
699  }
700  count[belongs_to[i]]++;
701  }
702 
703  bool converged = false;
704  int iteration = 0;
705  while (!converged && iteration<iterations_) {
706  converged = true;
707  iteration++;
708 
709  // compute the new cluster centers
710  for (int i=0; i<branching; ++i) {
711  memset(dcenters[i],0,sizeof(double)*veclen_);
712  radiuses[i] = 0;
713  }
714  for (int i=0; i<indices_length; ++i) {
715  ElementType* vec = dataset_[indices[i]];
716  double* center = dcenters[belongs_to[i]];
717  for (size_t k=0; k<veclen_; ++k) {
718  center[k] += vec[k];
719  }
720  }
721  for (int i=0; i<branching; ++i) {
722  int cnt = count[i];
723  for (size_t k=0; k<veclen_; ++k) {
724  dcenters[i][k] /= cnt;
725  }
726  }
727 
728  // reassign points to clusters
729  for (int i=0; i<indices_length; ++i) {
730  DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
731  int new_centroid = 0;
732  for (int j=1; j<branching; ++j) {
733  DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
734  if (sq_dist>new_sq_dist) {
735  new_centroid = j;
736  sq_dist = new_sq_dist;
737  }
738  }
739  if (sq_dist>radiuses[new_centroid]) {
740  radiuses[new_centroid] = sq_dist;
741  }
742  if (new_centroid != belongs_to[i]) {
743  count[belongs_to[i]]--;
744  count[new_centroid]++;
745  belongs_to[i] = new_centroid;
746 
747  converged = false;
748  }
749  }
750 
751  for (int i=0; i<branching; ++i) {
752  // if one cluster converges to an empty cluster,
753  // move an element into that cluster
754  if (count[i]==0) {
755  int j = (i+1)%branching;
756  while (count[j]<=1) {
757  j = (j+1)%branching;
758  }
759 
760  for (int k=0; k<indices_length; ++k) {
761  if (belongs_to[k]==j) {
762  // for cluster j, we move the furthest element from the center to the empty cluster i
763  if ( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
764  belongs_to[k] = i;
765  count[j]--;
766  count[i]++;
767  break;
768  }
769  }
770  }
771  converged = false;
772  }
773  }
774 
775  }
776 
777  DistanceType** centers = new DistanceType*[branching];
778 
779  for (int i=0; i<branching; ++i) {
780  centers[i] = new DistanceType[veclen_];
781  memoryCounter_ += (int)(veclen_*sizeof(DistanceType));
782  for (size_t k=0; k<veclen_; ++k) {
783  centers[i][k] = (DistanceType)dcenters[i][k];
784  }
785  }
786 
787 
788  // compute kmeans clustering for each of the resulting clusters
789  node->childs = pool_.allocate<KMeansNodePtr>(branching);
790  int start = 0;
791  int end = start;
792  for (int c=0; c<branching; ++c) {
793  int s = count[c];
794 
795  DistanceType variance = 0;
796  DistanceType mean_radius =0;
797  for (int i=0; i<indices_length; ++i) {
798  if (belongs_to[i]==c) {
799  DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
800  variance += d;
801  mean_radius += sqrt(d);
802  std::swap(indices[i],indices[end]);
803  std::swap(belongs_to[i],belongs_to[end]);
804  end++;
805  }
806  }
807  variance /= s;
808  mean_radius /= s;
809  variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
810 
811  node->childs[c] = pool_.allocate<KMeansNode>();
812  node->childs[c]->radius = radiuses[c];
813  node->childs[c]->pivot = centers[c];
814  node->childs[c]->variance = variance;
815  node->childs[c]->mean_radius = mean_radius;
816  node->childs[c]->indices = NULL;
817  computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
818  start=end;
819  }
820 
821  delete[] dcenters.data;
822  delete[] centers;
823  delete[] count;
824  delete[] belongs_to;
825  }
826 
827 
828 
842  void findNN(KMeansNodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
843  Heap<BranchSt>* heap)
844  {
845  // Ignore those clusters that are too far away
846  {
847  DistanceType bsq = distance_(vec, node->pivot, veclen_);
848  DistanceType rsq = node->radius;
849  DistanceType wsq = result.worstDist();
850 
851  DistanceType val = bsq-rsq-wsq;
852  DistanceType val2 = val*val-4*rsq*wsq;
853 
854  //if (val>0) {
855  if ((val>0)&&(val2>0)) {
856  return;
857  }
858  }
859 
860  if (node->childs==NULL) {
861  if (checks>=maxChecks) {
862  if (result.full()) return;
863  }
864  checks += node->size;
865  for (int i=0; i<node->size; ++i) {
866  int index = node->indices[i];
867  DistanceType dist = distance_(dataset_[index], vec, veclen_);
868  result.addPoint(dist, index);
869  }
870  }
871  else {
872  DistanceType* domain_distances = new DistanceType[branching_];
873  int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
874  delete[] domain_distances;
875  findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
876  }
877  }
878 
887  int exploreNodeBranches(KMeansNodePtr node, const ElementType* q, DistanceType* domain_distances, Heap<BranchSt>* heap)
888  {
889 
890  int best_index = 0;
891  domain_distances[best_index] = distance_(q, node->childs[best_index]->pivot, veclen_);
892  for (int i=1; i<branching_; ++i) {
893  domain_distances[i] = distance_(q, node->childs[i]->pivot, veclen_);
894  if (domain_distances[i]<domain_distances[best_index]) {
895  best_index = i;
896  }
897  }
898 
899  // float* best_center = node->childs[best_index]->pivot;
900  for (int i=0; i<branching_; ++i) {
901  if (i != best_index) {
902  domain_distances[i] -= cb_index_*node->childs[i]->variance;
903 
904  // float dist_to_border = getDistanceToBorder(node.childs[i].pivot,best_center,q);
905  // if (domain_distances[i]<dist_to_border) {
906  // domain_distances[i] = dist_to_border;
907  // }
908  heap->insert(BranchSt(node->childs[i],domain_distances[i]));
909  }
910  }
911 
912  return best_index;
913  }
914 
915 
919  void findExactNN(KMeansNodePtr node, ResultSet<DistanceType>& result, const ElementType* vec)
920  {
921  // Ignore those clusters that are too far away
922  {
923  DistanceType bsq = distance_(vec, node->pivot, veclen_);
924  DistanceType rsq = node->radius;
925  DistanceType wsq = result.worstDist();
926 
927  DistanceType val = bsq-rsq-wsq;
928  DistanceType val2 = val*val-4*rsq*wsq;
929 
930  // if (val>0) {
931  if ((val>0)&&(val2>0)) {
932  return;
933  }
934  }
935 
936 
937  if (node->childs==NULL) {
938  for (int i=0; i<node->size; ++i) {
939  int index = node->indices[i];
940  DistanceType dist = distance_(dataset_[index], vec, veclen_);
941  result.addPoint(dist, index);
942  }
943  }
944  else {
945  int* sort_indices = new int[branching_];
946 
947  getCenterOrdering(node, vec, sort_indices);
948 
949  for (int i=0; i<branching_; ++i) {
950  findExactNN(node->childs[sort_indices[i]],result,vec);
951  }
952 
953  delete[] sort_indices;
954  }
955  }
956 
957 
963  void getCenterOrdering(KMeansNodePtr node, const ElementType* q, int* sort_indices)
964  {
965  DistanceType* domain_distances = new DistanceType[branching_];
966  for (int i=0; i<branching_; ++i) {
967  DistanceType dist = distance_(q, node->childs[i]->pivot, veclen_);
968 
969  int j=0;
970  while (domain_distances[j]<dist && j<i) j++;
971  for (int k=i; k>j; --k) {
972  domain_distances[k] = domain_distances[k-1];
973  sort_indices[k] = sort_indices[k-1];
974  }
975  domain_distances[j] = dist;
976  sort_indices[j] = i;
977  }
978  delete[] domain_distances;
979  }
980 
986  DistanceType getDistanceToBorder(DistanceType* p, DistanceType* c, DistanceType* q)
987  {
988  DistanceType sum = 0;
989  DistanceType sum2 = 0;
990 
991  for (int i=0; i<veclen_; ++i) {
992  DistanceType t = c[i]-p[i];
993  sum += t*(q[i]-(c[i]+p[i])/2);
994  sum2 += t*t;
995  }
996 
997  return sum*sum/sum2;
998  }
999 
1000 
1010  int getMinVarianceClusters(KMeansNodePtr root, KMeansNodePtr* clusters, int clusters_length, DistanceType& varianceValue)
1011  {
1012  int clusterCount = 1;
1013  clusters[0] = root;
1014 
1015  DistanceType meanVariance = root->variance*root->size;
1016 
1017  while (clusterCount<clusters_length) {
1018  DistanceType minVariance = (std::numeric_limits<DistanceType>::max)();
1019  int splitIndex = -1;
1020 
1021  for (int i=0; i<clusterCount; ++i) {
1022  if (clusters[i]->childs != NULL) {
1023 
1024  DistanceType variance = meanVariance - clusters[i]->variance*clusters[i]->size;
1025 
1026  for (int j=0; j<branching_; ++j) {
1027  variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
1028  }
1029  if (variance<minVariance) {
1030  minVariance = variance;
1031  splitIndex = i;
1032  }
1033  }
1034  }
1035 
1036  if (splitIndex==-1) break;
1037  if ( (branching_+clusterCount-1) > clusters_length) break;
1038 
1039  meanVariance = minVariance;
1040 
1041  // split node
1042  KMeansNodePtr toSplit = clusters[splitIndex];
1043  clusters[splitIndex] = toSplit->childs[0];
1044  for (int i=1; i<branching_; ++i) {
1045  clusters[clusterCount++] = toSplit->childs[i];
1046  }
1047  }
1048 
1049  varianceValue = meanVariance/root->size;
1050  return clusterCount;
1051  }
1052 
1053 private:
1055  int branching_;
1056 
1058  int iterations_;
1059 
1061  flann_centers_init_t centers_init_;
1062 
1069  float cb_index_;
1070 
1074  const Matrix<ElementType> dataset_;
1075 
1077  IndexParams index_params_;
1078 
1082  size_t size_;
1083 
1087  size_t veclen_;
1088 
1092  KMeansNodePtr root_;
1093 
1097  int* indices_;
1098 
1102  Distance distance_;
1103 
1107  PooledAllocator pool_;
1108 
1112  int memoryCounter_;
1113 };
1114 
1115 }
1116 
1117 #endif //OPENCV_FLANN_KMEANS_INDEX_H_
void buildIndex()
Definition: kmeans_index.h:361
flann_algorithm_t
Definition: defines.h:81
virtual ~KMeansIndex()
Definition: kmeans_index.h:317
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst)
computes square root of each matrix element (dst = src**0.5)
CvFileNode * node
Definition: core_c.h:1638
T get_param(const IndexParams &params, std::string name, const T &default_value)
Definition: params.h:59
int usedMemory
Definition: allocator.h:89
GLint level
Definition: tracking.hpp:88
size_t cols
Definition: matrix.h:52
GLuint start
CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags)
sorts independently each matrix row or each matrix column
Definition: defines.h:108
Definition: kmeans_index.h:81
CvSize CvPoint2D32f int count
Definition: calib3d.hpp:221
int wastedMemory
Definition: allocator.h:90
double rand_double(double high=1.0, double low=0)
Definition: random.h:61
virtual bool full() const =0
Definition: kmeans_index.h:56
T * allocate(size_t count=1)
Definition: allocator.h:178
void chooseCentersRandom(int k, int *indices, int indices_length, int *centers, int &centers_length)
Definition: kmeans_index.h:108
Distance::ResultType DistanceType
Definition: kmeans_index.h:85
GLuint index
Definition: core_c.h:986
void saveIndex(FILE *stream)
Saves the index to a stream.
Definition: kmeans_index.h:378
KMeansIndexParams(int branching=32, int iterations=11, flann_centers_init_t centers_init=FLANN_CENTERS_RANDOM, float cb_index=0.2)
Definition: kmeans_index.h:58
size_t size() const
Definition: kmeans_index.h:330
Definition: general.h:41
int d
Definition: legacy.hpp:3064
void chooseCentersKMeanspp(int k, int *indices, int indices_length, int *centers, int &centers_length)
Definition: kmeans_index.h:200
CvRect r
Definition: core_c.h:1282
CvPoint center
Definition: core_c.h:1290
int usedMemory() const
Definition: kmeans_index.h:353
OutputArray sum
Definition: imgproc.hpp:620
IndexParams getParameters() const
Definition: kmeans_index.h:484
Definition: defines.h:85
T node
Definition: result_set.h:52
const CvArr const CvArr CvArr * result
Definition: core_c.h:805
typedef void(CV_CDECL *CvMouseCallback)(int event
KMeansIndex & operator=(const KMeansIndex &)
Definition: params.h:44
double start
Definition: core_c.h:774
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams)
Definition: kmeans_index.h:425
GLuint GLfloat * val
IplImage CvMemStorage CvSeq int level
Definition: legacy.hpp:2917
GLuint GLuint GLsizei GLenum const GLvoid * indices
Definition: legacy.hpp:3084
int getClusterCenters(Matrix< DistanceType > &centers)
Definition: kmeans_index.h:459
void set_cb_index(float index)
Definition: kmeans_index.h:344
GLdouble GLdouble GLdouble GLdouble q
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
computes per-element minimum of two arrays (dst = min(src1, src2))
int index
Definition: core_c.h:309
const CvMat CvMat CvMat int k
Definition: legacy.hpp:3052
flann_algorithm_t getType() const
Definition: kmeans_index.h:263
GLuint GLuint GLsizei count
Definition: core_c.h:973
GLenum GLsizei n
Definition: random.h:81
CvPoint2D32f float float float c
Definition: legacy.hpp:578
void load_value(FILE *stream, T &value, size_t count=1)
Definition: saving.h:147
const CvArr CvArr double int int int iterations
Definition: tracking.hpp:102
flann_centers_init_t
Definition: defines.h:105
void chooseCentersGonzales(int k, int *indices, int indices_length, int *centers, int &centers_length)
Definition: kmeans_index.h:149
const CvMat CvMat * indices
Definition: legacy.hpp:3052
Definition: result_set.h:66
size_t veclen() const
Definition: kmeans_index.h:338
CvArr * mean
Definition: core_c.h:802
GLuint GLuint end
std::map< std::string, any > IndexParams
Definition: params.h:42
int next()
Definition: random.h:120
GLfloat GLfloat p
GLenum const GLfloat * params
Definition: compat.hpp:688
const GLubyte * c
Definition: legacy.hpp:633
Definition: defines.h:170
void(KMeansIndex::* centersAlgFunction)(int, int *, int, int *, int &)
Definition: kmeans_index.h:89
void loadIndex(FILE *stream)
Loads the index from a stream.
Definition: kmeans_index.h:390
Definition: nn_index.h:48
int n
Definition: legacy.hpp:3070
size_t rows
Definition: matrix.h:51
::max::max int
Definition: functional.hpp:324
centersAlgFunction chooseCenters
Definition: kmeans_index.h:94
KMeansIndex(const Matrix< ElementType > &inputData, const IndexParams &params=KMeansIndexParams(), Distance d=Distance())
Definition: kmeans_index.h:275
double double end
Definition: core_c.h:774
Definition: heap.h:48
CV_EXPORTS void swap(Mat &a, Mat &b)
swaps two matrices
Definition: defines.h:107
GLdouble GLdouble t
void save_value(FILE *stream, const T &value, size_t count=1)
Definition: saving.h:126
GLdouble s
CvPoint int radius
Definition: core_c.h:1290
Distance::ElementType ElementType
Definition: kmeans_index.h:84
bool popMin(T &value)
Definition: heap.h:147
int rand_int(int high=RAND_MAX, int low=0)
Definition: random.h:72
Definition: defines.h:109
CvPoint3D64f double * dist
Definition: legacy.hpp:556
Definition: result_set.h:50
short float uchar uchar uchar uchar uchar ushort int uchar ushort int float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float int int int float int int int float int CV_CUDEV_IMPLEMENT_VEC_BINARY_OP char CV_CUDEV_IMPLEMENT_VEC_BINARY_OP ushort CV_CUDEV_IMPLEMENT_VEC_BINARY_OP short CV_CUDEV_IMPLEMENT_VEC_BINARY_OP int CV_CUDEV_IMPLEMENT_VEC_BINARY_OP uint CV_CUDEV_IMPLEMENT_VEC_BINARY_OP float CV_CUDEV_IMPLEMENT_VEC_BINARY_OP double int int uint double
Definition: vec_math.hpp:432