00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #ifndef OPENCV_FLANN_KMEANS_INDEX_H_
00032 #define OPENCV_FLANN_KMEANS_INDEX_H_
00033
00034 #include <algorithm>
00035 #include <string>
00036 #include <map>
00037 #include <cassert>
00038 #include <limits>
00039 #include <cmath>
00040
00041 #include "general.h"
00042 #include "nn_index.h"
00043 #include "dist.h"
00044 #include "matrix.h"
00045 #include "result_set.h"
00046 #include "heap.h"
00047 #include "allocator.h"
00048 #include "random.h"
00049 #include "saving.h"
00050 #include "logger.h"
00051
00052
00053 namespace cvflann
00054 {
00055
00056 struct KMeansIndexParams : public IndexParams
00057 {
00058 KMeansIndexParams(int branching = 32, int iterations = 11,
00059 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM, float cb_index = 0.2 )
00060 {
00061 (*this)["algorithm"] = FLANN_INDEX_KMEANS;
00062
00063 (*this)["branching"] = branching;
00064
00065 (*this)["iterations"] = iterations;
00066
00067 (*this)["centers_init"] = centers_init;
00068
00069 (*this)["cb_index"] = cb_index;
00070 }
00071 };
00072
00073
00080 template <typename Distance>
00081 class KMeansIndex : public NNIndex<Distance>
00082 {
00083 public:
00084 typedef typename Distance::ElementType ElementType;
00085 typedef typename Distance::ResultType DistanceType;
00086
00087
00088
00089 typedef void (KMeansIndex::* centersAlgFunction)(int, int*, int, int*, int&);
00090
00094 centersAlgFunction chooseCenters;
00095
00096
00097
00108 void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length)
00109 {
00110 UniqueRandom r(indices_length);
00111
00112 int index;
00113 for (index=0; index<k; ++index) {
00114 bool duplicate = true;
00115 int rnd;
00116 while (duplicate) {
00117 duplicate = false;
00118 rnd = r.next();
00119 if (rnd<0) {
00120 centers_length = index;
00121 return;
00122 }
00123
00124 centers[index] = indices[rnd];
00125
00126 for (int j=0; j<index; ++j) {
00127 DistanceType sq = distance_(dataset_[centers[index]], dataset_[centers[j]], dataset_.cols);
00128 if (sq<1e-16) {
00129 duplicate = true;
00130 }
00131 }
00132 }
00133 }
00134
00135 centers_length = index;
00136 }
00137
00138
00149 void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length)
00150 {
00151 int n = indices_length;
00152
00153 int rnd = rand_int(n);
00154 assert(rnd >=0 && rnd < n);
00155
00156 centers[0] = indices[rnd];
00157
00158 int index;
00159 for (index=1; index<k; ++index) {
00160
00161 int best_index = -1;
00162 DistanceType best_val = 0;
00163 for (int j=0; j<n; ++j) {
00164 DistanceType dist = distance_(dataset_[centers[0]],dataset_[indices[j]],dataset_.cols);
00165 for (int i=1; i<index; ++i) {
00166 DistanceType tmp_dist = distance_(dataset_[centers[i]],dataset_[indices[j]],dataset_.cols);
00167 if (tmp_dist<dist) {
00168 dist = tmp_dist;
00169 }
00170 }
00171 if (dist>best_val) {
00172 best_val = dist;
00173 best_index = j;
00174 }
00175 }
00176 if (best_index!=-1) {
00177 centers[index] = indices[best_index];
00178 }
00179 else {
00180 break;
00181 }
00182 }
00183 centers_length = index;
00184 }
00185
00186
00200 void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length)
00201 {
00202 int n = indices_length;
00203
00204 double currentPot = 0;
00205 DistanceType* closestDistSq = new DistanceType[n];
00206
00207
00208 int index = rand_int(n);
00209 assert(index >=0 && index < n);
00210 centers[0] = indices[index];
00211
00212 for (int i = 0; i < n; i++) {
00213 closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
00214 currentPot += closestDistSq[i];
00215 }
00216
00217
00218 const int numLocalTries = 1;
00219
00220
00221 int centerCount;
00222 for (centerCount = 1; centerCount < k; centerCount++) {
00223
00224
00225 double bestNewPot = -1;
00226 int bestNewIndex = -1;
00227 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
00228
00229
00230
00231 double randVal = rand_double(currentPot);
00232 for (index = 0; index < n-1; index++) {
00233 if (randVal <= closestDistSq[index]) break;
00234 else randVal -= closestDistSq[index];
00235 }
00236
00237
00238 double newPot = 0;
00239 for (int i = 0; i < n; i++) newPot += std::min( distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols), closestDistSq[i] );
00240
00241
00242 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
00243 bestNewPot = newPot;
00244 bestNewIndex = index;
00245 }
00246 }
00247
00248
00249 centers[centerCount] = indices[bestNewIndex];
00250 currentPot = bestNewPot;
00251 for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols), closestDistSq[i] );
00252 }
00253
00254 centers_length = centerCount;
00255
00256 delete[] closestDistSq;
00257 }
00258
00259
00260
00261 public:
00262
00263 flann_algorithm_t getType() const
00264 {
00265 return FLANN_INDEX_KMEANS;
00266 }
00267
00275 KMeansIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KMeansIndexParams(),
00276 Distance d = Distance())
00277 : dataset_(inputData), index_params_(params), root_(NULL), indices_(NULL), distance_(d)
00278 {
00279 memoryCounter_ = 0;
00280
00281 size_ = dataset_.rows;
00282 veclen_ = dataset_.cols;
00283
00284 branching_ = get_param(params,"branching",32);
00285 iterations_ = get_param(params,"iterations",11);
00286 if (iterations_<0) {
00287 iterations_ = (std::numeric_limits<int>::max)();
00288 }
00289 centers_init_ = get_param(params,"centers_init",FLANN_CENTERS_RANDOM);
00290
00291 if (centers_init_==FLANN_CENTERS_RANDOM) {
00292 chooseCenters = &KMeansIndex::chooseCentersRandom;
00293 }
00294 else if (centers_init_==FLANN_CENTERS_GONZALES) {
00295 chooseCenters = &KMeansIndex::chooseCentersGonzales;
00296 }
00297 else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
00298 chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
00299 }
00300 else {
00301 throw FLANNException("Unknown algorithm for choosing initial centers.");
00302 }
00303 cb_index_ = 0.4f;
00304
00305 }
00306
00307
00308 KMeansIndex(const KMeansIndex&);
00309 KMeansIndex& operator=(const KMeansIndex&);
00310
00311
00317 virtual ~KMeansIndex()
00318 {
00319 if (root_ != NULL) {
00320 free_centers(root_);
00321 }
00322 if (indices_!=NULL) {
00323 delete[] indices_;
00324 }
00325 }
00326
00330 size_t size() const
00331 {
00332 return size_;
00333 }
00334
00338 size_t veclen() const
00339 {
00340 return veclen_;
00341 }
00342
00343
00344 void set_cb_index( float index)
00345 {
00346 cb_index_ = index;
00347 }
00348
00353 int usedMemory() const
00354 {
00355 return pool_.usedMemory+pool_.wastedMemory+memoryCounter_;
00356 }
00357
00361 void buildIndex()
00362 {
00363 if (branching_<2) {
00364 throw FLANNException("Branching factor must be at least 2");
00365 }
00366
00367 indices_ = new int[size_];
00368 for (size_t i=0; i<size_; ++i) {
00369 indices_[i] = int(i);
00370 }
00371
00372 root_ = pool_.allocate<KMeansNode>();
00373 computeNodeStatistics(root_, indices_, (int)size_);
00374 computeClustering(root_, indices_, (int)size_, branching_,0);
00375 }
00376
00377
00378 void saveIndex(FILE* stream)
00379 {
00380 save_value(stream, branching_);
00381 save_value(stream, iterations_);
00382 save_value(stream, memoryCounter_);
00383 save_value(stream, cb_index_);
00384 save_value(stream, *indices_, (int)size_);
00385
00386 save_tree(stream, root_);
00387 }
00388
00389
00390 void loadIndex(FILE* stream)
00391 {
00392 load_value(stream, branching_);
00393 load_value(stream, iterations_);
00394 load_value(stream, memoryCounter_);
00395 load_value(stream, cb_index_);
00396 if (indices_!=NULL) {
00397 delete[] indices_;
00398 }
00399 indices_ = new int[size_];
00400 load_value(stream, *indices_, size_);
00401
00402 if (root_!=NULL) {
00403 free_centers(root_);
00404 }
00405 load_tree(stream, root_);
00406
00407 index_params_["algorithm"] = getType();
00408 index_params_["branching"] = branching_;
00409 index_params_["iterations"] = iterations_;
00410 index_params_["centers_init"] = centers_init_;
00411 index_params_["cb_index"] = cb_index_;
00412
00413 }
00414
00415
00425 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
00426 {
00427
00428 int maxChecks = get_param(searchParams,"checks",32);
00429
00430 if (maxChecks==FLANN_CHECKS_UNLIMITED) {
00431 findExactNN(root_, result, vec);
00432 }
00433 else {
00434
00435 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
00436
00437 int checks = 0;
00438 findNN(root_, result, vec, checks, maxChecks, heap);
00439
00440 BranchSt branch;
00441 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
00442 KMeansNodePtr node = branch.node;
00443 findNN(node, result, vec, checks, maxChecks, heap);
00444 }
00445 assert(result.full());
00446
00447 delete heap;
00448 }
00449
00450 }
00451
00459 int getClusterCenters(Matrix<DistanceType>& centers)
00460 {
00461 int numClusters = centers.rows;
00462 if (numClusters<1) {
00463 throw FLANNException("Number of clusters must be at least 1");
00464 }
00465
00466 DistanceType variance;
00467 KMeansNodePtr* clusters = new KMeansNodePtr[numClusters];
00468
00469 int clusterCount = getMinVarianceClusters(root_, clusters, numClusters, variance);
00470
00471 Logger::info("Clusters requested: %d, returning %d\n",numClusters, clusterCount);
00472
00473 for (int i=0; i<clusterCount; ++i) {
00474 DistanceType* center = clusters[i]->pivot;
00475 for (size_t j=0; j<veclen_; ++j) {
00476 centers[i][j] = center[j];
00477 }
00478 }
00479 delete[] clusters;
00480
00481 return clusterCount;
00482 }
00483
00484 IndexParams getParameters() const
00485 {
00486 return index_params_;
00487 }
00488
00489
00490 private:
00494 struct KMeansNode
00495 {
00499 DistanceType* pivot;
00503 DistanceType radius;
00507 DistanceType mean_radius;
00511 DistanceType variance;
00515 int size;
00519 KMeansNode** childs;
00523 int* indices;
00527 int level;
00528 };
00529 typedef KMeansNode* KMeansNodePtr;
00530
00534 typedef BranchStruct<KMeansNodePtr, DistanceType> BranchSt;
00535
00536
00537
00538
00539 void save_tree(FILE* stream, KMeansNodePtr node)
00540 {
00541 save_value(stream, *node);
00542 save_value(stream, *(node->pivot), (int)veclen_);
00543 if (node->childs==NULL) {
00544 int indices_offset = (int)(node->indices - indices_);
00545 save_value(stream, indices_offset);
00546 }
00547 else {
00548 for(int i=0; i<branching_; ++i) {
00549 save_tree(stream, node->childs[i]);
00550 }
00551 }
00552 }
00553
00554
00555 void load_tree(FILE* stream, KMeansNodePtr& node)
00556 {
00557 node = pool_.allocate<KMeansNode>();
00558 load_value(stream, *node);
00559 node->pivot = new DistanceType[veclen_];
00560 load_value(stream, *(node->pivot), (int)veclen_);
00561 if (node->childs==NULL) {
00562 int indices_offset;
00563 load_value(stream, indices_offset);
00564 node->indices = indices_ + indices_offset;
00565 }
00566 else {
00567 node->childs = pool_.allocate<KMeansNodePtr>(branching_);
00568 for(int i=0; i<branching_; ++i) {
00569 load_tree(stream, node->childs[i]);
00570 }
00571 }
00572 }
00573
00574
00578 void free_centers(KMeansNodePtr node)
00579 {
00580 delete[] node->pivot;
00581 if (node->childs!=NULL) {
00582 for (int k=0; k<branching_; ++k) {
00583 free_centers(node->childs[k]);
00584 }
00585 }
00586 }
00587
00595 void computeNodeStatistics(KMeansNodePtr node, int* indices, int indices_length)
00596 {
00597
00598 DistanceType radius = 0;
00599 DistanceType variance = 0;
00600 DistanceType* mean = new DistanceType[veclen_];
00601 memoryCounter_ += int(veclen_*sizeof(DistanceType));
00602
00603 memset(mean,0,veclen_*sizeof(DistanceType));
00604
00605 for (size_t i=0; i<size_; ++i) {
00606 ElementType* vec = dataset_[indices[i]];
00607 for (size_t j=0; j<veclen_; ++j) {
00608 mean[j] += vec[j];
00609 }
00610 variance += distance_(vec, ZeroIterator<ElementType>(), veclen_);
00611 }
00612 for (size_t j=0; j<veclen_; ++j) {
00613 mean[j] /= size_;
00614 }
00615 variance /= size_;
00616 variance -= distance_(mean, ZeroIterator<ElementType>(), veclen_);
00617
00618 DistanceType tmp = 0;
00619 for (int i=0; i<indices_length; ++i) {
00620 tmp = distance_(mean, dataset_[indices[i]], veclen_);
00621 if (tmp>radius) {
00622 radius = tmp;
00623 }
00624 }
00625
00626 node->variance = variance;
00627 node->radius = radius;
00628 node->pivot = mean;
00629 }
00630
00631
00643 void computeClustering(KMeansNodePtr node, int* indices, int indices_length, int branching, int level)
00644 {
00645 node->size = indices_length;
00646 node->level = level;
00647
00648 if (indices_length < branching) {
00649 node->indices = indices;
00650 std::sort(node->indices,node->indices+indices_length);
00651 node->childs = NULL;
00652 return;
00653 }
00654
00655 int* centers_idx = new int[branching];
00656 int centers_length;
00657 (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
00658
00659 if (centers_length<branching) {
00660 node->indices = indices;
00661 std::sort(node->indices,node->indices+indices_length);
00662 node->childs = NULL;
00663 delete [] centers_idx;
00664 return;
00665 }
00666
00667
00668 Matrix<double> dcenters(new double[branching*veclen_],branching,veclen_);
00669 for (int i=0; i<centers_length; ++i) {
00670 ElementType* vec = dataset_[centers_idx[i]];
00671 for (size_t k=0; k<veclen_; ++k) {
00672 dcenters[i][k] = double(vec[k]);
00673 }
00674 }
00675 delete[] centers_idx;
00676
00677 std::vector<DistanceType> radiuses(branching);
00678 int* count = new int[branching];
00679 for (int i=0; i<branching; ++i) {
00680 radiuses[i] = 0;
00681 count[i] = 0;
00682 }
00683
00684
00685 int* belongs_to = new int[indices_length];
00686 for (int i=0; i<indices_length; ++i) {
00687
00688 DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
00689 belongs_to[i] = 0;
00690 for (int j=1; j<branching; ++j) {
00691 DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
00692 if (sq_dist>new_sq_dist) {
00693 belongs_to[i] = j;
00694 sq_dist = new_sq_dist;
00695 }
00696 }
00697 if (sq_dist>radiuses[belongs_to[i]]) {
00698 radiuses[belongs_to[i]] = sq_dist;
00699 }
00700 count[belongs_to[i]]++;
00701 }
00702
00703 bool converged = false;
00704 int iteration = 0;
00705 while (!converged && iteration<iterations_) {
00706 converged = true;
00707 iteration++;
00708
00709
00710 for (int i=0; i<branching; ++i) {
00711 memset(dcenters[i],0,sizeof(double)*veclen_);
00712 radiuses[i] = 0;
00713 }
00714 for (int i=0; i<indices_length; ++i) {
00715 ElementType* vec = dataset_[indices[i]];
00716 double* center = dcenters[belongs_to[i]];
00717 for (size_t k=0; k<veclen_; ++k) {
00718 center[k] += vec[k];
00719 }
00720 }
00721 for (int i=0; i<branching; ++i) {
00722 int cnt = count[i];
00723 for (size_t k=0; k<veclen_; ++k) {
00724 dcenters[i][k] /= cnt;
00725 }
00726 }
00727
00728
00729 for (int i=0; i<indices_length; ++i) {
00730 DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
00731 int new_centroid = 0;
00732 for (int j=1; j<branching; ++j) {
00733 DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
00734 if (sq_dist>new_sq_dist) {
00735 new_centroid = j;
00736 sq_dist = new_sq_dist;
00737 }
00738 }
00739 if (sq_dist>radiuses[new_centroid]) {
00740 radiuses[new_centroid] = sq_dist;
00741 }
00742 if (new_centroid != belongs_to[i]) {
00743 count[belongs_to[i]]--;
00744 count[new_centroid]++;
00745 belongs_to[i] = new_centroid;
00746
00747 converged = false;
00748 }
00749 }
00750
00751 for (int i=0; i<branching; ++i) {
00752
00753
00754 if (count[i]==0) {
00755 int j = (i+1)%branching;
00756 while (count[j]<=1) {
00757 j = (j+1)%branching;
00758 }
00759
00760 for (int k=0; k<indices_length; ++k) {
00761 if (belongs_to[k]==j) {
00762 belongs_to[k] = i;
00763 count[j]--;
00764 count[i]++;
00765 break;
00766 }
00767 }
00768 converged = false;
00769 }
00770 }
00771
00772 }
00773
00774 DistanceType** centers = new DistanceType*[branching];
00775
00776 for (int i=0; i<branching; ++i) {
00777 centers[i] = new DistanceType[veclen_];
00778 memoryCounter_ += (int)(veclen_*sizeof(DistanceType));
00779 for (size_t k=0; k<veclen_; ++k) {
00780 centers[i][k] = (DistanceType)dcenters[i][k];
00781 }
00782 }
00783
00784
00785
00786 node->childs = pool_.allocate<KMeansNodePtr>(branching);
00787 int start = 0;
00788 int end = start;
00789 for (int c=0; c<branching; ++c) {
00790 int s = count[c];
00791
00792 DistanceType variance = 0;
00793 DistanceType mean_radius =0;
00794 for (int i=0; i<indices_length; ++i) {
00795 if (belongs_to[i]==c) {
00796 DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
00797 variance += d;
00798 mean_radius += sqrt(d);
00799 std::swap(indices[i],indices[end]);
00800 std::swap(belongs_to[i],belongs_to[end]);
00801 end++;
00802 }
00803 }
00804 variance /= s;
00805 mean_radius /= s;
00806 variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
00807
00808 node->childs[c] = pool_.allocate<KMeansNode>();
00809 node->childs[c]->radius = radiuses[c];
00810 node->childs[c]->pivot = centers[c];
00811 node->childs[c]->variance = variance;
00812 node->childs[c]->mean_radius = mean_radius;
00813 node->childs[c]->indices = NULL;
00814 computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
00815 start=end;
00816 }
00817
00818 delete[] dcenters.data;
00819 delete[] centers;
00820 delete[] count;
00821 delete[] belongs_to;
00822 }
00823
00824
00825
00839 void findNN(KMeansNodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
00840 Heap<BranchSt>* heap)
00841 {
00842
00843 {
00844 DistanceType bsq = distance_(vec, node->pivot, veclen_);
00845 DistanceType rsq = node->radius;
00846 DistanceType wsq = result.worstDist();
00847
00848 DistanceType val = bsq-rsq-wsq;
00849 DistanceType val2 = val*val-4*rsq*wsq;
00850
00851
00852 if ((val>0)&&(val2>0)) {
00853 return;
00854 }
00855 }
00856
00857 if (node->childs==NULL) {
00858 if (checks>=maxChecks) {
00859 if (result.full()) return;
00860 }
00861 checks += node->size;
00862 for (int i=0; i<node->size; ++i) {
00863 int index = node->indices[i];
00864 DistanceType dist = distance_(dataset_[index], vec, veclen_);
00865 result.addPoint(dist, index);
00866 }
00867 }
00868 else {
00869 DistanceType* domain_distances = new DistanceType[branching_];
00870 int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
00871 delete[] domain_distances;
00872 findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
00873 }
00874 }
00875
00884 int exploreNodeBranches(KMeansNodePtr node, const ElementType* q, DistanceType* domain_distances, Heap<BranchSt>* heap)
00885 {
00886
00887 int best_index = 0;
00888 domain_distances[best_index] = distance_(q, node->childs[best_index]->pivot, veclen_);
00889 for (int i=1; i<branching_; ++i) {
00890 domain_distances[i] = distance_(q, node->childs[i]->pivot, veclen_);
00891 if (domain_distances[i]<domain_distances[best_index]) {
00892 best_index = i;
00893 }
00894 }
00895
00896
00897 for (int i=0; i<branching_; ++i) {
00898 if (i != best_index) {
00899 domain_distances[i] -= cb_index_*node->childs[i]->variance;
00900
00901
00902
00903
00904
00905 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
00906 }
00907 }
00908
00909 return best_index;
00910 }
00911
00912
00916 void findExactNN(KMeansNodePtr node, ResultSet<DistanceType>& result, const ElementType* vec)
00917 {
00918
00919 {
00920 DistanceType bsq = distance_(vec, node->pivot, veclen_);
00921 DistanceType rsq = node->radius;
00922 DistanceType wsq = result.worstDist();
00923
00924 DistanceType val = bsq-rsq-wsq;
00925 DistanceType val2 = val*val-4*rsq*wsq;
00926
00927
00928 if ((val>0)&&(val2>0)) {
00929 return;
00930 }
00931 }
00932
00933
00934 if (node->childs==NULL) {
00935 for (int i=0; i<node->size; ++i) {
00936 int index = node->indices[i];
00937 DistanceType dist = distance_(dataset_[index], vec, veclen_);
00938 result.addPoint(dist, index);
00939 }
00940 }
00941 else {
00942 int* sort_indices = new int[branching_];
00943
00944 getCenterOrdering(node, vec, sort_indices);
00945
00946 for (int i=0; i<branching_; ++i) {
00947 findExactNN(node->childs[sort_indices[i]],result,vec);
00948 }
00949
00950 delete[] sort_indices;
00951 }
00952 }
00953
00954
00960 void getCenterOrdering(KMeansNodePtr node, const ElementType* q, int* sort_indices)
00961 {
00962 DistanceType* domain_distances = new DistanceType[branching_];
00963 for (int i=0; i<branching_; ++i) {
00964 DistanceType dist = distance_(q, node->childs[i]->pivot, veclen_);
00965
00966 int j=0;
00967 while (domain_distances[j]<dist && j<i) j++;
00968 for (int k=i; k>j; --k) {
00969 domain_distances[k] = domain_distances[k-1];
00970 sort_indices[k] = sort_indices[k-1];
00971 }
00972 domain_distances[j] = dist;
00973 sort_indices[j] = i;
00974 }
00975 delete[] domain_distances;
00976 }
00977
00983 DistanceType getDistanceToBorder(DistanceType* p, DistanceType* c, DistanceType* q)
00984 {
00985 DistanceType sum = 0;
00986 DistanceType sum2 = 0;
00987
00988 for (int i=0; i<veclen_; ++i) {
00989 DistanceType t = c[i]-p[i];
00990 sum += t*(q[i]-(c[i]+p[i])/2);
00991 sum2 += t*t;
00992 }
00993
00994 return sum*sum/sum2;
00995 }
00996
00997
01007 int getMinVarianceClusters(KMeansNodePtr root, KMeansNodePtr* clusters, int clusters_length, DistanceType& varianceValue)
01008 {
01009 int clusterCount = 1;
01010 clusters[0] = root;
01011
01012 DistanceType meanVariance = root->variance*root->size;
01013
01014 while (clusterCount<clusters_length) {
01015 DistanceType minVariance = (std::numeric_limits<DistanceType>::max)();
01016 int splitIndex = -1;
01017
01018 for (int i=0; i<clusterCount; ++i) {
01019 if (clusters[i]->childs != NULL) {
01020
01021 DistanceType variance = meanVariance - clusters[i]->variance*clusters[i]->size;
01022
01023 for (int j=0; j<branching_; ++j) {
01024 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
01025 }
01026 if (variance<minVariance) {
01027 minVariance = variance;
01028 splitIndex = i;
01029 }
01030 }
01031 }
01032
01033 if (splitIndex==-1) break;
01034 if ( (branching_+clusterCount-1) > clusters_length) break;
01035
01036 meanVariance = minVariance;
01037
01038
01039 KMeansNodePtr toSplit = clusters[splitIndex];
01040 clusters[splitIndex] = toSplit->childs[0];
01041 for (int i=1; i<branching_; ++i) {
01042 clusters[clusterCount++] = toSplit->childs[i];
01043 }
01044 }
01045
01046 varianceValue = meanVariance/root->size;
01047 return clusterCount;
01048 }
01049
01050 private:
01052 int branching_;
01053
01055 int iterations_;
01056
01058 flann_centers_init_t centers_init_;
01059
01066 float cb_index_;
01067
01071 const Matrix<ElementType> dataset_;
01072
01074 IndexParams index_params_;
01075
01079 size_t size_;
01080
01084 size_t veclen_;
01085
01089 KMeansNodePtr root_;
01090
01094 int* indices_;
01095
01099 Distance distance_;
01100
01104 PooledAllocator pool_;
01105
01109 int memoryCounter_;
01110 };
01111
01112 }
01113
01114 #endif //OPENCV_FLANN_KMEANS_INDEX_H_