00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #ifndef _OPENCV_KMEANSTREE_H_
00032 #define _OPENCV_KMEANSTREE_H_
00033
00034 #include <algorithm>
00035 #include <string>
00036 #include <map>
00037 #include <cassert>
00038 #include <limits>
00039 #include <cmath>
00040
00041 #include "opencv2/flann/general.h"
00042 #include "opencv2/flann/nn_index.h"
00043 #include "opencv2/flann/matrix.h"
00044 #include "opencv2/flann/result_set.h"
00045 #include "opencv2/flann/heap.h"
00046 #include "opencv2/flann/allocator.h"
00047 #include "opencv2/flann/random.h"
00048
00049
00050 namespace cvflann
00051 {
00052
00053 struct CV_EXPORTS KMeansIndexParams : public IndexParams {
00054 KMeansIndexParams(int branching_ = 32, int iterations_ = 11,
00055 flann_centers_init_t centers_init_ = FLANN_CENTERS_RANDOM, float cb_index_ = 0.2 ) :
00056 IndexParams(FLANN_INDEX_KMEANS),
00057 branching(branching_),
00058 iterations(iterations_),
00059 centers_init(centers_init_),
00060 cb_index(cb_index_) {};
00061
00062 int branching;
00063 int iterations;
00064 flann_centers_init_t centers_init;
00065 float cb_index;
00066
00067 void print() const
00068 {
00069 logger().info("Index type: %d\n",(int)algorithm);
00070 logger().info("Branching: %d\n", branching);
00071 logger().info("Iterations: %d\n", iterations);
00072 logger().info("Centres initialisation: %d\n", centers_init);
00073 logger().info("Cluster boundary weight: %g\n", cb_index);
00074 }
00075
00076 };
00077
00078
00085 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type >
00086 class KMeansIndex : public NNIndex<ELEM_TYPE>
00087 {
00088
00092 int branching;
00093
00098 int max_iter;
00099
00106 float cb_index;
00107
00111 const Matrix<ELEM_TYPE> dataset;
00112
00113 const IndexParams& index_params;
00114
00118 size_t size_;
00119
00123 size_t veclen_;
00124
00125
00129 struct KMeansNodeSt {
00133 DIST_TYPE* pivot;
00137 DIST_TYPE radius;
00141 DIST_TYPE mean_radius;
00145 DIST_TYPE variance;
00149 int size;
00153 KMeansNodeSt** childs;
00157 int* indices;
00161 int level;
00162 };
00163 typedef KMeansNodeSt* KMeansNode;
00164
00165
00166
00170 typedef BranchStruct<KMeansNode> BranchSt;
00171
00172
00176 KMeansNode root;
00177
00181 int* indices;
00182
00183
00191 PooledAllocator pool;
00192
00196 int memoryCounter;
00197
00198
00199 typedef void (KMeansIndex::*centersAlgFunction)(int, int*, int, int*, int&);
00200
00204 centersAlgFunction chooseCenters;
00205
00206
00207
00218 void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length)
00219 {
00220 UniqueRandom r(indices_length);
00221
00222 int index;
00223 for (index=0;index<k;++index) {
00224 bool duplicate = true;
00225 int rnd;
00226 while (duplicate) {
00227 duplicate = false;
00228 rnd = r.next();
00229 if (rnd<0) {
00230 centers_length = index;
00231 return;
00232 }
00233
00234 centers[index] = indices[rnd];
00235
00236 for (int j=0;j<index;++j) {
00237 float sq = (float)flann_dist(dataset[centers[index]],dataset[centers[index]]+dataset.cols,dataset[centers[j]]);
00238 if (sq<1e-16) {
00239 duplicate = true;
00240 }
00241 }
00242 }
00243 }
00244
00245 centers_length = index;
00246 }
00247
00248
00259 void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length)
00260 {
00261 int n = indices_length;
00262
00263 int rnd = rand_int(n);
00264 assert(rnd >=0 && rnd < n);
00265
00266 centers[0] = indices[rnd];
00267
00268 int index;
00269 for (index=1; index<k; ++index) {
00270
00271 int best_index = -1;
00272 float best_val = 0;
00273 for (int j=0;j<n;++j) {
00274 float dist = (float)flann_dist(dataset[centers[0]],dataset[centers[0]]+dataset.cols,dataset[indices[j]]);
00275 for (int i=1;i<index;++i) {
00276 float tmp_dist = (float)flann_dist(dataset[centers[i]],dataset[centers[i]]+dataset.cols,dataset[indices[j]]);
00277 if (tmp_dist<dist) {
00278 dist = tmp_dist;
00279 }
00280 }
00281 if (dist>best_val) {
00282 best_val = dist;
00283 best_index = j;
00284 }
00285 }
00286 if (best_index!=-1) {
00287 centers[index] = indices[best_index];
00288 }
00289 else {
00290 break;
00291 }
00292 }
00293 centers_length = index;
00294 }
00295
00296
00310 void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length)
00311 {
00312 int n = indices_length;
00313
00314 double currentPot = 0;
00315 double* closestDistSq = new double[n];
00316
00317
00318 int index = rand_int(n);
00319 assert(index >=0 && index < n);
00320 centers[0] = indices[index];
00321
00322 for (int i = 0; i < n; i++) {
00323 closestDistSq[i] = flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]);
00324 currentPot += closestDistSq[i];
00325 }
00326
00327
00328 const int numLocalTries = 1;
00329
00330
00331 int centerCount;
00332 for (centerCount = 1; centerCount < k; centerCount++) {
00333
00334
00335 double bestNewPot = -1;
00336 int bestNewIndex = -1;
00337 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
00338
00339
00340
00341 double randVal = rand_double(currentPot);
00342 for (index = 0; index < n-1; index++) {
00343 if (randVal <= closestDistSq[index])
00344 break;
00345 else
00346 randVal -= closestDistSq[index];
00347 }
00348
00349
00350 double newPot = 0;
00351 for (int i = 0; i < n; i++)
00352 newPot += std::min( flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]), closestDistSq[i] );
00353
00354
00355 if (bestNewPot < 0 || newPot < bestNewPot) {
00356 bestNewPot = newPot;
00357 bestNewIndex = index;
00358 }
00359 }
00360
00361
00362 centers[centerCount] = indices[bestNewIndex];
00363 currentPot = bestNewPot;
00364 for (int i = 0; i < n; i++)
00365 closestDistSq[i] = std::min( flann_dist(dataset[indices[i]], dataset[indices[i]]+dataset.cols, dataset[indices[bestNewIndex]]), closestDistSq[i] );
00366 }
00367
00368 centers_length = centerCount;
00369
00370 delete[] closestDistSq;
00371 }
00372
00373
00374
00375 public:
00376
00377
00378 flann_algorithm_t getType() const
00379 {
00380 return FLANN_INDEX_KMEANS;
00381 }
00382
00390 KMeansIndex(const Matrix<ELEM_TYPE>& inputData, const KMeansIndexParams& params = KMeansIndexParams() )
00391 : dataset(inputData), index_params(params), root(NULL), indices(NULL)
00392 {
00393 memoryCounter = 0;
00394
00395 size_ = dataset.rows;
00396 veclen_ = dataset.cols;
00397
00398 branching = params.branching;
00399 max_iter = params.iterations;
00400 if (max_iter<0) {
00401 max_iter = (std::numeric_limits<int>::max)();
00402 }
00403 flann_centers_init_t centersInit = params.centers_init;
00404
00405 if (centersInit==FLANN_CENTERS_RANDOM) {
00406 chooseCenters = &KMeansIndex::chooseCentersRandom;
00407 }
00408 else if (centersInit==FLANN_CENTERS_GONZALES) {
00409 chooseCenters = &KMeansIndex::chooseCentersGonzales;
00410 }
00411 else if (centersInit==FLANN_CENTERS_KMEANSPP) {
00412 chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
00413 }
00414 else {
00415 throw FLANNException("Unknown algorithm for choosing initial centers.");
00416 }
00417 cb_index = 0.4f;
00418
00419 }
00420
00421
00427 virtual ~KMeansIndex()
00428 {
00429 if (root != NULL) {
00430 free_centers(root);
00431 }
00432 if (indices!=NULL) {
00433 delete[] indices;
00434 }
00435 }
00436
00440 size_t size() const
00441 {
00442 return size_;
00443 }
00444
00448 size_t veclen() const
00449 {
00450 return veclen_;
00451 }
00452
00453
00454 void set_cb_index( float index)
00455 {
00456 cb_index = index;
00457 }
00458
00459
00464 int usedMemory() const
00465 {
00466 return pool.usedMemory+pool.wastedMemory+memoryCounter;
00467 }
00468
00472 void buildIndex()
00473 {
00474 if (branching<2) {
00475 throw FLANNException("Branching factor must be at least 2");
00476 }
00477
00478 indices = new int[size_];
00479 for (size_t i=0;i<size_;++i) {
00480 indices[i] = (int)i;
00481 }
00482
00483 root = pool.allocate<KMeansNodeSt>();
00484 computeNodeStatistics(root, indices, (int)size_);
00485 computeClustering(root, indices, (int)size_, branching,0);
00486 }
00487
00488
00489 void saveIndex(FILE* stream)
00490 {
00491 save_value(stream, branching);
00492 save_value(stream, max_iter);
00493 save_value(stream, memoryCounter);
00494 save_value(stream, cb_index);
00495 save_value(stream, *indices, (int)size_);
00496
00497 save_tree(stream, root);
00498 }
00499
00500
00501 void loadIndex(FILE* stream)
00502 {
00503 load_value(stream, branching);
00504 load_value(stream, max_iter);
00505 load_value(stream, memoryCounter);
00506 load_value(stream, cb_index);
00507 if (indices!=NULL) {
00508 delete[] indices;
00509 }
00510 indices = new int[size_];
00511 load_value(stream, *indices, (int)size_);
00512
00513 if (root!=NULL) {
00514 free_centers(root);
00515 }
00516 load_tree(stream, root);
00517 }
00518
00519
00529 void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams)
00530 {
00531
00532 int maxChecks = searchParams.checks;
00533
00534 if (maxChecks<0) {
00535 findExactNN(root, result, vec);
00536 }
00537 else {
00538
00539 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
00540
00541 int checks = 0;
00542
00543 findNN(root, result, vec, checks, maxChecks, heap);
00544
00545 BranchSt branch;
00546 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
00547 KMeansNode node = branch.node;
00548 findNN(node, result, vec, checks, maxChecks, heap);
00549 }
00550 assert(result.full());
00551
00552 delete heap;
00553 }
00554
00555 }
00556
00557
00565 int getClusterCenters(Matrix<DIST_TYPE>& centers)
00566 {
00567 int numClusters = centers.rows;
00568 if (numClusters<1) {
00569 throw FLANNException("Number of clusters must be at least 1");
00570 }
00571
00572 float variance;
00573 KMeansNode* clusters = new KMeansNode[numClusters];
00574
00575 int clusterCount = getMinVarianceClusters(root, clusters, numClusters, variance);
00576
00577
00578
00579
00580 for (int i=0;i<clusterCount;++i) {
00581 DIST_TYPE* center = clusters[i]->pivot;
00582 for (size_t j=0;j<veclen_;++j) {
00583 centers[i][j] = center[j];
00584 }
00585 }
00586 delete[] clusters;
00587
00588 return clusterCount;
00589 }
00590
00591 const IndexParams* getParameters() const
00592 {
00593 return &index_params;
00594 }
00595
00596
00597 private:
00598
00599 KMeansIndex& operator=(const KMeansIndex&);
00600 KMeansIndex(const KMeansIndex&);
00601
00602 void save_tree(FILE* stream, KMeansNode node)
00603 {
00604 save_value(stream, *node);
00605 save_value(stream, *(node->pivot), (int)veclen_);
00606 if (node->childs==NULL) {
00607 int indices_offset = (int)(node->indices - indices);
00608 save_value(stream, indices_offset);
00609 }
00610 else {
00611 for(int i=0; i<branching; ++i) {
00612 save_tree(stream, node->childs[i]);
00613 }
00614 }
00615 }
00616
00617
00618 void load_tree(FILE* stream, KMeansNode& node)
00619 {
00620 node = pool.allocate<KMeansNodeSt>();
00621 load_value(stream, *node);
00622 node->pivot = new DIST_TYPE[veclen_];
00623 load_value(stream, *(node->pivot), (int)veclen_);
00624 if (node->childs==NULL) {
00625 int indices_offset;
00626 load_value(stream, indices_offset);
00627 node->indices = indices + indices_offset;
00628 }
00629 else {
00630 node->childs = pool.allocate<KMeansNode>(branching);
00631 for(int i=0; i<branching; ++i) {
00632 load_tree(stream, node->childs[i]);
00633 }
00634 }
00635 }
00636
00637
00641 void free_centers(KMeansNode node)
00642 {
00643 delete[] node->pivot;
00644 if (node->childs!=NULL) {
00645 for (int k=0;k<branching;++k) {
00646 free_centers(node->childs[k]);
00647 }
00648 }
00649 }
00650
00658 void computeNodeStatistics(KMeansNode node, int* indices, int indices_length) {
00659
00660 double radius = 0;
00661 double variance = 0;
00662 DIST_TYPE* mean = new DIST_TYPE[veclen_];
00663 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00664
00665 memset(mean,0,veclen_*sizeof(float));
00666
00667 for (size_t i=0;i<size_;++i) {
00668 ELEM_TYPE* vec = dataset[indices[i]];
00669 for (size_t j=0;j<veclen_;++j) {
00670 mean[j] += vec[j];
00671 }
00672 variance += flann_dist(vec,vec+veclen_,zero());
00673 }
00674 for (size_t j=0;j<veclen_;++j) {
00675 mean[j] /= size_;
00676 }
00677 variance /= size_;
00678 variance -= flann_dist(mean,mean+veclen_,zero());
00679
00680 double tmp = 0;
00681 for (int i=0;i<indices_length;++i) {
00682 tmp = flann_dist(mean, mean + veclen_, dataset[indices[i]]);
00683 if (tmp>radius) {
00684 radius = tmp;
00685 }
00686 }
00687
00688 node->variance = (DIST_TYPE)variance;
00689 node->radius = (DIST_TYPE)radius;
00690 node->pivot = mean;
00691 }
00692
00693
00705 void computeClustering(KMeansNode node, int* indices, int indices_length, int branching, int level)
00706 {
00707 node->size = indices_length;
00708 node->level = level;
00709
00710 if (indices_length < branching) {
00711 node->indices = indices;
00712 std::sort(node->indices,node->indices+indices_length);
00713 node->childs = NULL;
00714 return;
00715 }
00716
00717 int* centers_idx = new int[branching];
00718 int centers_length;
00719 (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
00720
00721 if (centers_length<branching) {
00722 node->indices = indices;
00723 std::sort(node->indices,node->indices+indices_length);
00724 node->childs = NULL;
00725 return;
00726 }
00727
00728
00729 Matrix<double> dcenters(new double[branching*veclen_],branching,(long)veclen_);
00730 for (int i=0; i<centers_length; ++i) {
00731 ELEM_TYPE* vec = dataset[centers_idx[i]];
00732 for (size_t k=0; k<veclen_; ++k) {
00733 dcenters[i][k] = double(vec[k]);
00734 }
00735 }
00736 delete[] centers_idx;
00737
00738 float* radiuses = new float[branching];
00739 int* count = new int[branching];
00740 for (int i=0;i<branching;++i) {
00741 radiuses[i] = 0;
00742 count[i] = 0;
00743 }
00744
00745
00746 int* belongs_to = new int[indices_length];
00747 for (int i=0;i<indices_length;++i) {
00748
00749 double sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]] + veclen_ ,dcenters[0]);
00750 belongs_to[i] = 0;
00751 for (int j=1;j<branching;++j) {
00752 double new_sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_, dcenters[j]);
00753 if (sq_dist>new_sq_dist) {
00754 belongs_to[i] = j;
00755 sq_dist = new_sq_dist;
00756 }
00757 }
00758 if (sq_dist>radiuses[belongs_to[i]]) {
00759 radiuses[belongs_to[i]] = (float)sq_dist;
00760 }
00761 count[belongs_to[i]]++;
00762 }
00763
00764 bool converged = false;
00765 int iteration = 0;
00766 while (!converged && iteration<max_iter) {
00767 converged = true;
00768 iteration++;
00769
00770
00771 for (int i=0;i<branching;++i) {
00772 memset(dcenters[i],0,sizeof(double)*veclen_);
00773 radiuses[i] = 0;
00774 }
00775 for (int i=0;i<indices_length;++i) {
00776 ELEM_TYPE* vec = dataset[indices[i]];
00777 double* center = dcenters[belongs_to[i]];
00778 for (size_t k=0;k<veclen_;++k) {
00779 center[k] += vec[k];
00780 }
00781 }
00782 for (int i=0;i<branching;++i) {
00783 int cnt = count[i];
00784 for (size_t k=0;k<veclen_;++k) {
00785 dcenters[i][k] /= cnt;
00786 }
00787 }
00788
00789
00790 for (int i=0;i<indices_length;++i) {
00791 float sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_ ,dcenters[0]);
00792 int new_centroid = 0;
00793 for (int j=1;j<branching;++j) {
00794 float new_sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_,dcenters[j]);
00795 if (sq_dist>new_sq_dist) {
00796 new_centroid = j;
00797 sq_dist = new_sq_dist;
00798 }
00799 }
00800 if (sq_dist>radiuses[new_centroid]) {
00801 radiuses[new_centroid] = sq_dist;
00802 }
00803 if (new_centroid != belongs_to[i]) {
00804 count[belongs_to[i]]--;
00805 count[new_centroid]++;
00806 belongs_to[i] = new_centroid;
00807
00808 converged = false;
00809 }
00810 }
00811
00812 for (int i=0;i<branching;++i) {
00813
00814
00815 if (count[i]==0) {
00816 int j = (i+1)%branching;
00817 while (count[j]<=1) {
00818 j = (j+1)%branching;
00819 }
00820
00821 for (int k=0;k<indices_length;++k) {
00822 if (belongs_to[k]==j) {
00823 belongs_to[k] = i;
00824 count[j]--;
00825 count[i]++;
00826 break;
00827 }
00828 }
00829 converged = false;
00830 }
00831 }
00832
00833 }
00834
00835 DIST_TYPE** centers = new DIST_TYPE*[branching];
00836
00837 for (int i=0; i<branching; ++i) {
00838 centers[i] = new DIST_TYPE[veclen_];
00839 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00840 for (size_t k=0; k<veclen_; ++k) {
00841 centers[i][k] = (DIST_TYPE)dcenters[i][k];
00842 }
00843 }
00844
00845
00846
00847 node->childs = pool.allocate<KMeansNode>(branching);
00848 int start = 0;
00849 int end = start;
00850 for (int c=0;c<branching;++c) {
00851 int s = count[c];
00852
00853 double variance = 0;
00854 double mean_radius =0;
00855 for (int i=0;i<indices_length;++i) {
00856 if (belongs_to[i]==c) {
00857 double d = flann_dist(dataset[indices[i]],dataset[indices[i]]+veclen_,zero());
00858 variance += d;
00859 mean_radius += sqrt(d);
00860 std::swap(indices[i],indices[end]);
00861 std::swap(belongs_to[i],belongs_to[end]);
00862 end++;
00863 }
00864 }
00865 variance /= s;
00866 mean_radius /= s;
00867 variance -= flann_dist(centers[c],centers[c]+veclen_,zero());
00868
00869 node->childs[c] = pool.allocate<KMeansNodeSt>();
00870 node->childs[c]->radius = radiuses[c];
00871 node->childs[c]->pivot = centers[c];
00872 node->childs[c]->variance = (float)variance;
00873 node->childs[c]->mean_radius = (float)mean_radius;
00874 node->childs[c]->indices = NULL;
00875 computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
00876 start=end;
00877 }
00878
00879 delete[] dcenters.data;
00880 delete[] centers;
00881 delete[] radiuses;
00882 delete[] count;
00883 delete[] belongs_to;
00884 }
00885
00886
00887
00901 void findNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, int& checks, int maxChecks,
00902 Heap<BranchSt>* heap)
00903 {
00904
00905 {
00906 DIST_TYPE bsq = (DIST_TYPE)flann_dist(vec, vec+veclen_, node->pivot);
00907 DIST_TYPE rsq = node->radius;
00908 DIST_TYPE wsq = result.worstDist();
00909
00910 DIST_TYPE val = bsq-rsq-wsq;
00911 DIST_TYPE val2 = val*val-4*rsq*wsq;
00912
00913
00914 if (val>0 && val2>0) {
00915 return;
00916 }
00917 }
00918
00919 if (node->childs==NULL) {
00920 if (checks>=maxChecks) {
00921 if (result.full()) return;
00922 }
00923 checks += node->size;
00924 for (int i=0;i<node->size;++i) {
00925 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00926 }
00927 }
00928 else {
00929 float* domain_distances = new float[branching];
00930 int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
00931 delete[] domain_distances;
00932 findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
00933 }
00934 }
00935
00944 int exploreNodeBranches(KMeansNode node, const ELEM_TYPE* q, float* domain_distances, Heap<BranchSt>* heap)
00945 {
00946
00947 int best_index = 0;
00948 domain_distances[best_index] = (float)flann_dist(q,q+veclen_,node->childs[best_index]->pivot);
00949 for (int i=1;i<branching;++i) {
00950 domain_distances[i] = (float)flann_dist(q,q+veclen_,node->childs[i]->pivot);
00951 if (domain_distances[i]<domain_distances[best_index]) {
00952 best_index = i;
00953 }
00954 }
00955
00956
00957 for (int i=0;i<branching;++i) {
00958 if (i != best_index) {
00959 domain_distances[i] -= cb_index*node->childs[i]->variance;
00960
00961
00962
00963
00964
00965 heap->insert(BranchSt::make_branch(node->childs[i],domain_distances[i]));
00966 }
00967 }
00968
00969 return best_index;
00970 }
00971
00972
00976 void findExactNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec)
00977 {
00978
00979 {
00980 float bsq = (float)flann_dist(vec, vec+veclen_, node->pivot);
00981 float rsq = node->radius;
00982 float wsq = result.worstDist();
00983
00984 float val = bsq-rsq-wsq;
00985 float val2 = val*val-4*rsq*wsq;
00986
00987
00988 if (val>0 && val2>0) {
00989 return;
00990 }
00991 }
00992
00993
00994 if (node->childs==NULL) {
00995 for (int i=0;i<node->size;++i) {
00996 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00997 }
00998 }
00999 else {
01000 int* sort_indices = new int[branching];
01001
01002 getCenterOrdering(node, vec, sort_indices);
01003
01004 for (int i=0; i<branching; ++i) {
01005 findExactNN(node->childs[sort_indices[i]],result,vec);
01006 }
01007
01008 delete[] sort_indices;
01009 }
01010 }
01011
01012
01018 void getCenterOrdering(KMeansNode node, const ELEM_TYPE* q, int* sort_indices)
01019 {
01020 float* domain_distances = new float[branching];
01021 for (int i=0;i<branching;++i) {
01022 float dist = (float)flann_dist(q, q+veclen_, node->childs[i]->pivot);
01023
01024 int j=0;
01025 while (domain_distances[j]<dist && j<i) j++;
01026 for (int k=i;k>j;--k) {
01027 domain_distances[k] = domain_distances[k-1];
01028 sort_indices[k] = sort_indices[k-1];
01029 }
01030 domain_distances[j] = dist;
01031 sort_indices[j] = i;
01032 }
01033 delete[] domain_distances;
01034 }
01035
01041 float getDistanceToBorder(float* p, float* c, float* q)
01042 {
01043 float sum = 0;
01044 float sum2 = 0;
01045
01046 for (int i=0;i<veclen_; ++i) {
01047 float t = c[i]-p[i];
01048 sum += t*(q[i]-(c[i]+p[i])/2);
01049 sum2 += t*t;
01050 }
01051
01052 return sum*sum/sum2;
01053 }
01054
01055
01065 int getMinVarianceClusters(KMeansNode root, KMeansNode* clusters, int clusters_length, float& varianceValue)
01066 {
01067 int clusterCount = 1;
01068 clusters[0] = root;
01069
01070 float meanVariance = root->variance*root->size;
01071
01072 while (clusterCount<clusters_length) {
01073 float minVariance = (std::numeric_limits<float>::max)();
01074 int splitIndex = -1;
01075
01076 for (int i=0;i<clusterCount;++i) {
01077 if (clusters[i]->childs != NULL) {
01078
01079 float variance = meanVariance - clusters[i]->variance*clusters[i]->size;
01080
01081 for (int j=0;j<branching;++j) {
01082 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
01083 }
01084 if (variance<minVariance) {
01085 minVariance = variance;
01086 splitIndex = i;
01087 }
01088 }
01089 }
01090
01091 if (splitIndex==-1) break;
01092 if ( (branching+clusterCount-1) > clusters_length) break;
01093
01094 meanVariance = minVariance;
01095
01096
01097 KMeansNode toSplit = clusters[splitIndex];
01098 clusters[splitIndex] = toSplit->childs[0];
01099 for (int i=1;i<branching;++i) {
01100 clusters[clusterCount++] = toSplit->childs[i];
01101 }
01102 }
01103
01104 varianceValue = meanVariance/root->size;
01105 return clusterCount;
01106 }
01107 };
01108
01109
01110
01111
01112
01113 }
01114
01115 #endif //_OPENCV_KMEANSTREE_H_