00001 /*********************************************************************** 00002 * Software License Agreement (BSD License) 00003 * 00004 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 00005 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 00006 * 00007 * THE BSD LICENSE 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 00013 * 1. Redistributions of source code must retain the above copyright 00014 * notice, this list of conditions and the following disclaimer. 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in the 00017 * documentation and/or other materials provided with the distribution. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00020 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00021 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00022 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00024 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00028 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 *************************************************************************/ 00030 00031 #ifndef _OPENCV_KMEANSTREE_H_ 00032 #define _OPENCV_KMEANSTREE_H_ 00033 00034 #include <algorithm> 00035 #include <string> 00036 #include <map> 00037 #include <cassert> 00038 #include <limits> 00039 #include <cmath> 00040 00041 #include "opencv2/flann/general.h" 00042 #include "opencv2/flann/nn_index.h" 00043 #include "opencv2/flann/matrix.h" 00044 #include "opencv2/flann/result_set.h" 00045 #include "opencv2/flann/heap.h" 00046 #include "opencv2/flann/allocator.h" 00047 #include "opencv2/flann/random.h" 00048 00049 00050 namespace cvflann 00051 { 00052 00053 struct CV_EXPORTS KMeansIndexParams : public IndexParams { 00054 KMeansIndexParams(int branching_ = 32, int iterations_ = 11, 00055 flann_centers_init_t centers_init_ = FLANN_CENTERS_RANDOM, float cb_index_ = 0.2 ) : 00056 IndexParams(FLANN_INDEX_KMEANS), 00057 branching(branching_), 00058 iterations(iterations_), 00059 centers_init(centers_init_), 00060 cb_index(cb_index_) {}; 00061 00062 int branching; // branching factor (for kmeans tree) 00063 int iterations; // max iterations to perform in one kmeans clustering (kmeans tree) 00064 flann_centers_init_t centers_init; // algorithm used for picking the initial cluster centers for kmeans tree 00065 float cb_index; // cluster boundary index. Used when searching the kmeans tree 00066 00067 void print() const 00068 { 00069 logger().info("Index type: %d\n",(int)algorithm); 00070 logger().info("Branching: %d\n", branching); 00071 logger().info("Iterations: %d\n", iterations); 00072 logger().info("Centres initialisation: %d\n", centers_init); 00073 logger().info("Cluster boundary weight: %g\n", cb_index); 00074 } 00075 00076 }; 00077 00078 00085 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type > 00086 class KMeansIndex : public NNIndex<ELEM_TYPE> 00087 { 00088 00092 int branching; 00093 00098 int max_iter; 00099 00106 float cb_index; 00107 00111 const Matrix<ELEM_TYPE> dataset; 00112 00113 const IndexParams& index_params; 00114 00118 size_t size_; 00119 00123 size_t veclen_; 00124 00125 00129 struct KMeansNodeSt { 00133 DIST_TYPE* pivot; 00137 DIST_TYPE radius; 00141 DIST_TYPE mean_radius; 00145 DIST_TYPE variance; 00149 int size; 00153 KMeansNodeSt** childs; 00157 int* indices; 00161 int level; 00162 }; 00163 typedef KMeansNodeSt* KMeansNode; 00164 00165 00166 00170 typedef BranchStruct<KMeansNode> BranchSt; 00171 00172 00176 KMeansNode root; 00177 00181 int* indices; 00182 00183 00191 PooledAllocator pool; 00192 00196 int memoryCounter; 00197 00198 00199 typedef void (KMeansIndex::*centersAlgFunction)(int, int*, int, int*, int&); 00200 00204 centersAlgFunction chooseCenters; 00205 00206 00207 00218 void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length) 00219 { 00220 UniqueRandom r(indices_length); 00221 00222 int index; 00223 for (index=0;index<k;++index) { 00224 bool duplicate = true; 00225 int rnd; 00226 while (duplicate) { 00227 duplicate = false; 00228 rnd = r.next(); 00229 if (rnd<0) { 00230 centers_length = index; 00231 return; 00232 } 00233 00234 centers[index] = indices[rnd]; 00235 00236 for (int j=0;j<index;++j) { 00237 float sq = (float)flann_dist(dataset[centers[index]],dataset[centers[index]]+dataset.cols,dataset[centers[j]]); 00238 if (sq<1e-16) { 00239 duplicate = true; 00240 } 00241 } 00242 } 00243 } 00244 00245 centers_length = index; 00246 } 00247 00248 00259 void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length) 00260 { 00261 int n = indices_length; 00262 00263 int rnd = rand_int(n); 00264 assert(rnd >=0 && rnd < n); 00265 00266 centers[0] = indices[rnd]; 00267 00268 int index; 00269 for (index=1; index<k; ++index) { 00270 00271 int best_index = -1; 00272 float best_val = 0; 00273 for (int j=0;j<n;++j) { 00274 float dist = (float)flann_dist(dataset[centers[0]],dataset[centers[0]]+dataset.cols,dataset[indices[j]]); 00275 for (int i=1;i<index;++i) { 00276 float tmp_dist = (float)flann_dist(dataset[centers[i]],dataset[centers[i]]+dataset.cols,dataset[indices[j]]); 00277 if (tmp_dist<dist) { 00278 dist = tmp_dist; 00279 } 00280 } 00281 if (dist>best_val) { 00282 best_val = dist; 00283 best_index = j; 00284 } 00285 } 00286 if (best_index!=-1) { 00287 centers[index] = indices[best_index]; 00288 } 00289 else { 00290 break; 00291 } 00292 } 00293 centers_length = index; 00294 } 00295 00296 00310 void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length) 00311 { 00312 int n = indices_length; 00313 00314 double currentPot = 0; 00315 double* closestDistSq = new double[n]; 00316 00317 // Choose one random center and set the closestDistSq values 00318 int index = rand_int(n); 00319 assert(index >=0 && index < n); 00320 centers[0] = indices[index]; 00321 00322 for (int i = 0; i < n; i++) { 00323 closestDistSq[i] = flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]); 00324 currentPot += closestDistSq[i]; 00325 } 00326 00327 00328 const int numLocalTries = 1; 00329 00330 // Choose each center 00331 int centerCount; 00332 for (centerCount = 1; centerCount < k; centerCount++) { 00333 00334 // Repeat several trials 00335 double bestNewPot = -1; 00336 int bestNewIndex = -1; 00337 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) { 00338 00339 // Choose our center - have to be slightly careful to return a valid answer even accounting 00340 // for possible rounding errors 00341 double randVal = rand_double(currentPot); 00342 for (index = 0; index < n-1; index++) { 00343 if (randVal <= closestDistSq[index]) 00344 break; 00345 else 00346 randVal -= closestDistSq[index]; 00347 } 00348 00349 // Compute the new potential 00350 double newPot = 0; 00351 for (int i = 0; i < n; i++) 00352 newPot += std::min( flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]), closestDistSq[i] ); 00353 00354 // Store the best result 00355 if (bestNewPot < 0 || newPot < bestNewPot) { 00356 bestNewPot = newPot; 00357 bestNewIndex = index; 00358 } 00359 } 00360 00361 // Add the appropriate center 00362 centers[centerCount] = indices[bestNewIndex]; 00363 currentPot = bestNewPot; 00364 for (int i = 0; i < n; i++) 00365 closestDistSq[i] = std::min( flann_dist(dataset[indices[i]], dataset[indices[i]]+dataset.cols, dataset[indices[bestNewIndex]]), closestDistSq[i] ); 00366 } 00367 00368 centers_length = centerCount; 00369 00370 delete[] closestDistSq; 00371 } 00372 00373 00374 00375 public: 00376 00377 00378 flann_algorithm_t getType() const 00379 { 00380 return FLANN_INDEX_KMEANS; 00381 } 00382 00390 KMeansIndex(const Matrix<ELEM_TYPE>& inputData, const KMeansIndexParams& params = KMeansIndexParams() ) 00391 : dataset(inputData), index_params(params), root(NULL), indices(NULL) 00392 { 00393 memoryCounter = 0; 00394 00395 size_ = dataset.rows; 00396 veclen_ = dataset.cols; 00397 00398 branching = params.branching; 00399 max_iter = params.iterations; 00400 if (max_iter<0) { 00401 max_iter = (std::numeric_limits<int>::max)(); 00402 } 00403 flann_centers_init_t centersInit = params.centers_init; 00404 00405 if (centersInit==FLANN_CENTERS_RANDOM) { 00406 chooseCenters = &KMeansIndex::chooseCentersRandom; 00407 } 00408 else if (centersInit==FLANN_CENTERS_GONZALES) { 00409 chooseCenters = &KMeansIndex::chooseCentersGonzales; 00410 } 00411 else if (centersInit==FLANN_CENTERS_KMEANSPP) { 00412 chooseCenters = &KMeansIndex::chooseCentersKMeanspp; 00413 } 00414 else { 00415 throw FLANNException("Unknown algorithm for choosing initial centers."); 00416 } 00417 cb_index = 0.4f; 00418 00419 } 00420 00421 00427 virtual ~KMeansIndex() 00428 { 00429 if (root != NULL) { 00430 free_centers(root); 00431 } 00432 if (indices!=NULL) { 00433 delete[] indices; 00434 } 00435 } 00436 00440 size_t size() const 00441 { 00442 return size_; 00443 } 00444 00448 size_t veclen() const 00449 { 00450 return veclen_; 00451 } 00452 00453 00454 void set_cb_index( float index) 00455 { 00456 cb_index = index; 00457 } 00458 00459 00464 int usedMemory() const 00465 { 00466 return pool.usedMemory+pool.wastedMemory+memoryCounter; 00467 } 00468 00472 void buildIndex() 00473 { 00474 if (branching<2) { 00475 throw FLANNException("Branching factor must be at least 2"); 00476 } 00477 00478 indices = new int[size_]; 00479 for (size_t i=0;i<size_;++i) { 00480 indices[i] = (int)i; 00481 } 00482 00483 root = pool.allocate<KMeansNodeSt>(); 00484 computeNodeStatistics(root, indices, (int)size_); 00485 computeClustering(root, indices, (int)size_, branching,0); 00486 } 00487 00488 00489 void saveIndex(FILE* stream) 00490 { 00491 save_value(stream, branching); 00492 save_value(stream, max_iter); 00493 save_value(stream, memoryCounter); 00494 save_value(stream, cb_index); 00495 save_value(stream, *indices, (int)size_); 00496 00497 save_tree(stream, root); 00498 } 00499 00500 00501 void loadIndex(FILE* stream) 00502 { 00503 load_value(stream, branching); 00504 load_value(stream, max_iter); 00505 load_value(stream, memoryCounter); 00506 load_value(stream, cb_index); 00507 if (indices!=NULL) { 00508 delete[] indices; 00509 } 00510 indices = new int[size_]; 00511 load_value(stream, *indices, (int)size_); 00512 00513 if (root!=NULL) { 00514 free_centers(root); 00515 } 00516 load_tree(stream, root); 00517 } 00518 00519 00529 void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams) 00530 { 00531 00532 int maxChecks = searchParams.checks; 00533 00534 if (maxChecks<0) { 00535 findExactNN(root, result, vec); 00536 } 00537 else { 00538 // Priority queue storing intermediate branches in the best-bin-first search 00539 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); 00540 00541 int checks = 0; 00542 00543 findNN(root, result, vec, checks, maxChecks, heap); 00544 00545 BranchSt branch; 00546 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) { 00547 KMeansNode node = branch.node; 00548 findNN(node, result, vec, checks, maxChecks, heap); 00549 } 00550 assert(result.full()); 00551 00552 delete heap; 00553 } 00554 00555 } 00556 00557 00565 int getClusterCenters(Matrix<DIST_TYPE>& centers) 00566 { 00567 int numClusters = centers.rows; 00568 if (numClusters<1) { 00569 throw FLANNException("Number of clusters must be at least 1"); 00570 } 00571 00572 float variance; 00573 KMeansNode* clusters = new KMeansNode[numClusters]; 00574 00575 int clusterCount = getMinVarianceClusters(root, clusters, numClusters, variance); 00576 00577 // logger().info("Clusters requested: %d, returning %d\n",numClusters, clusterCount); 00578 00579 00580 for (int i=0;i<clusterCount;++i) { 00581 DIST_TYPE* center = clusters[i]->pivot; 00582 for (size_t j=0;j<veclen_;++j) { 00583 centers[i][j] = center[j]; 00584 } 00585 } 00586 delete[] clusters; 00587 00588 return clusterCount; 00589 } 00590 00591 const IndexParams* getParameters() const 00592 { 00593 return &index_params; 00594 } 00595 00596 00597 private: 00598 00599 KMeansIndex& operator=(const KMeansIndex&); 00600 KMeansIndex(const KMeansIndex&); 00601 00602 void save_tree(FILE* stream, KMeansNode node) 00603 { 00604 save_value(stream, *node); 00605 save_value(stream, *(node->pivot), (int)veclen_); 00606 if (node->childs==NULL) { 00607 int indices_offset = (int)(node->indices - indices); 00608 save_value(stream, indices_offset); 00609 } 00610 else { 00611 for(int i=0; i<branching; ++i) { 00612 save_tree(stream, node->childs[i]); 00613 } 00614 } 00615 } 00616 00617 00618 void load_tree(FILE* stream, KMeansNode& node) 00619 { 00620 node = pool.allocate<KMeansNodeSt>(); 00621 load_value(stream, *node); 00622 node->pivot = new DIST_TYPE[veclen_]; 00623 load_value(stream, *(node->pivot), (int)veclen_); 00624 if (node->childs==NULL) { 00625 int indices_offset; 00626 load_value(stream, indices_offset); 00627 node->indices = indices + indices_offset; 00628 } 00629 else { 00630 node->childs = pool.allocate<KMeansNode>(branching); 00631 for(int i=0; i<branching; ++i) { 00632 load_tree(stream, node->childs[i]); 00633 } 00634 } 00635 } 00636 00637 00641 void free_centers(KMeansNode node) 00642 { 00643 delete[] node->pivot; 00644 if (node->childs!=NULL) { 00645 for (int k=0;k<branching;++k) { 00646 free_centers(node->childs[k]); 00647 } 00648 } 00649 } 00650 00658 void computeNodeStatistics(KMeansNode node, int* indices, int indices_length) { 00659 00660 double radius = 0; 00661 double variance = 0; 00662 DIST_TYPE* mean = new DIST_TYPE[veclen_]; 00663 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE)); 00664 00665 memset(mean,0,veclen_*sizeof(float)); 00666 00667 for (size_t i=0;i<size_;++i) { 00668 ELEM_TYPE* vec = dataset[indices[i]]; 00669 for (size_t j=0;j<veclen_;++j) { 00670 mean[j] += vec[j]; 00671 } 00672 variance += flann_dist(vec,vec+veclen_,zero()); 00673 } 00674 for (size_t j=0;j<veclen_;++j) { 00675 mean[j] /= size_; 00676 } 00677 variance /= size_; 00678 variance -= flann_dist(mean,mean+veclen_,zero()); 00679 00680 double tmp = 0; 00681 for (int i=0;i<indices_length;++i) { 00682 tmp = flann_dist(mean, mean + veclen_, dataset[indices[i]]); 00683 if (tmp>radius) { 00684 radius = tmp; 00685 } 00686 } 00687 00688 node->variance = (DIST_TYPE)variance; 00689 node->radius = (DIST_TYPE)radius; 00690 node->pivot = mean; 00691 } 00692 00693 00705 void computeClustering(KMeansNode node, int* indices, int indices_length, int branching, int level) 00706 { 00707 node->size = indices_length; 00708 node->level = level; 00709 00710 if (indices_length < branching) { 00711 node->indices = indices; 00712 std::sort(node->indices,node->indices+indices_length); 00713 node->childs = NULL; 00714 return; 00715 } 00716 00717 int* centers_idx = new int[branching]; 00718 int centers_length; 00719 (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length); 00720 00721 if (centers_length<branching) { 00722 node->indices = indices; 00723 std::sort(node->indices,node->indices+indices_length); 00724 node->childs = NULL; 00725 return; 00726 } 00727 00728 00729 Matrix<double> dcenters(new double[branching*veclen_],branching,(long)veclen_); 00730 for (int i=0; i<centers_length; ++i) { 00731 ELEM_TYPE* vec = dataset[centers_idx[i]]; 00732 for (size_t k=0; k<veclen_; ++k) { 00733 dcenters[i][k] = double(vec[k]); 00734 } 00735 } 00736 delete[] centers_idx; 00737 00738 float* radiuses = new float[branching]; 00739 int* count = new int[branching]; 00740 for (int i=0;i<branching;++i) { 00741 radiuses[i] = 0; 00742 count[i] = 0; 00743 } 00744 00745 // assign points to clusters 00746 int* belongs_to = new int[indices_length]; 00747 for (int i=0;i<indices_length;++i) { 00748 00749 double sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]] + veclen_ ,dcenters[0]); 00750 belongs_to[i] = 0; 00751 for (int j=1;j<branching;++j) { 00752 double new_sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_, dcenters[j]); 00753 if (sq_dist>new_sq_dist) { 00754 belongs_to[i] = j; 00755 sq_dist = new_sq_dist; 00756 } 00757 } 00758 if (sq_dist>radiuses[belongs_to[i]]) { 00759 radiuses[belongs_to[i]] = (float)sq_dist; 00760 } 00761 count[belongs_to[i]]++; 00762 } 00763 00764 bool converged = false; 00765 int iteration = 0; 00766 while (!converged && iteration<max_iter) { 00767 converged = true; 00768 iteration++; 00769 00770 // compute the new cluster centers 00771 for (int i=0;i<branching;++i) { 00772 memset(dcenters[i],0,sizeof(double)*veclen_); 00773 radiuses[i] = 0; 00774 } 00775 for (int i=0;i<indices_length;++i) { 00776 ELEM_TYPE* vec = dataset[indices[i]]; 00777 double* center = dcenters[belongs_to[i]]; 00778 for (size_t k=0;k<veclen_;++k) { 00779 center[k] += vec[k]; 00780 } 00781 } 00782 for (int i=0;i<branching;++i) { 00783 int cnt = count[i]; 00784 for (size_t k=0;k<veclen_;++k) { 00785 dcenters[i][k] /= cnt; 00786 } 00787 } 00788 00789 // reassign points to clusters 00790 for (int i=0;i<indices_length;++i) { 00791 float sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_ ,dcenters[0]); 00792 int new_centroid = 0; 00793 for (int j=1;j<branching;++j) { 00794 float new_sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_,dcenters[j]); 00795 if (sq_dist>new_sq_dist) { 00796 new_centroid = j; 00797 sq_dist = new_sq_dist; 00798 } 00799 } 00800 if (sq_dist>radiuses[new_centroid]) { 00801 radiuses[new_centroid] = sq_dist; 00802 } 00803 if (new_centroid != belongs_to[i]) { 00804 count[belongs_to[i]]--; 00805 count[new_centroid]++; 00806 belongs_to[i] = new_centroid; 00807 00808 converged = false; 00809 } 00810 } 00811 00812 for (int i=0;i<branching;++i) { 00813 // if one cluster converges to an empty cluster, 00814 // move an element into that cluster 00815 if (count[i]==0) { 00816 int j = (i+1)%branching; 00817 while (count[j]<=1) { 00818 j = (j+1)%branching; 00819 } 00820 00821 for (int k=0;k<indices_length;++k) { 00822 if (belongs_to[k]==j) { 00823 belongs_to[k] = i; 00824 count[j]--; 00825 count[i]++; 00826 break; 00827 } 00828 } 00829 converged = false; 00830 } 00831 } 00832 00833 } 00834 00835 DIST_TYPE** centers = new DIST_TYPE*[branching]; 00836 00837 for (int i=0; i<branching; ++i) { 00838 centers[i] = new DIST_TYPE[veclen_]; 00839 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE)); 00840 for (size_t k=0; k<veclen_; ++k) { 00841 centers[i][k] = (DIST_TYPE)dcenters[i][k]; 00842 } 00843 } 00844 00845 00846 // compute kmeans clustering for each of the resulting clusters 00847 node->childs = pool.allocate<KMeansNode>(branching); 00848 int start = 0; 00849 int end = start; 00850 for (int c=0;c<branching;++c) { 00851 int s = count[c]; 00852 00853 double variance = 0; 00854 double mean_radius =0; 00855 for (int i=0;i<indices_length;++i) { 00856 if (belongs_to[i]==c) { 00857 double d = flann_dist(dataset[indices[i]],dataset[indices[i]]+veclen_,zero()); 00858 variance += d; 00859 mean_radius += sqrt(d); 00860 std::swap(indices[i],indices[end]); 00861 std::swap(belongs_to[i],belongs_to[end]); 00862 end++; 00863 } 00864 } 00865 variance /= s; 00866 mean_radius /= s; 00867 variance -= flann_dist(centers[c],centers[c]+veclen_,zero()); 00868 00869 node->childs[c] = pool.allocate<KMeansNodeSt>(); 00870 node->childs[c]->radius = radiuses[c]; 00871 node->childs[c]->pivot = centers[c]; 00872 node->childs[c]->variance = (float)variance; 00873 node->childs[c]->mean_radius = (float)mean_radius; 00874 node->childs[c]->indices = NULL; 00875 computeClustering(node->childs[c],indices+start, end-start, branching, level+1); 00876 start=end; 00877 } 00878 00879 delete[] dcenters.data; 00880 delete[] centers; 00881 delete[] radiuses; 00882 delete[] count; 00883 delete[] belongs_to; 00884 } 00885 00886 00887 00901 void findNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, int& checks, int maxChecks, 00902 Heap<BranchSt>* heap) 00903 { 00904 // Ignore those clusters that are too far away 00905 { 00906 DIST_TYPE bsq = (DIST_TYPE)flann_dist(vec, vec+veclen_, node->pivot); 00907 DIST_TYPE rsq = node->radius; 00908 DIST_TYPE wsq = result.worstDist(); 00909 00910 DIST_TYPE val = bsq-rsq-wsq; 00911 DIST_TYPE val2 = val*val-4*rsq*wsq; 00912 00913 //if (val>0) { 00914 if (val>0 && val2>0) { 00915 return; 00916 } 00917 } 00918 00919 if (node->childs==NULL) { 00920 if (checks>=maxChecks) { 00921 if (result.full()) return; 00922 } 00923 checks += node->size; 00924 for (int i=0;i<node->size;++i) { 00925 result.addPoint(dataset[node->indices[i]], node->indices[i]); 00926 } 00927 } 00928 else { 00929 float* domain_distances = new float[branching]; 00930 int closest_center = exploreNodeBranches(node, vec, domain_distances, heap); 00931 delete[] domain_distances; 00932 findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap); 00933 } 00934 } 00935 00944 int exploreNodeBranches(KMeansNode node, const ELEM_TYPE* q, float* domain_distances, Heap<BranchSt>* heap) 00945 { 00946 00947 int best_index = 0; 00948 domain_distances[best_index] = (float)flann_dist(q,q+veclen_,node->childs[best_index]->pivot); 00949 for (int i=1;i<branching;++i) { 00950 domain_distances[i] = (float)flann_dist(q,q+veclen_,node->childs[i]->pivot); 00951 if (domain_distances[i]<domain_distances[best_index]) { 00952 best_index = i; 00953 } 00954 } 00955 00956 // float* best_center = node->childs[best_index]->pivot; 00957 for (int i=0;i<branching;++i) { 00958 if (i != best_index) { 00959 domain_distances[i] -= cb_index*node->childs[i]->variance; 00960 00961 // float dist_to_border = getDistanceToBorder(node.childs[i].pivot,best_center,q); 00962 // if (domain_distances[i]<dist_to_border) { 00963 // domain_distances[i] = dist_to_border; 00964 // } 00965 heap->insert(BranchSt::make_branch(node->childs[i],domain_distances[i])); 00966 } 00967 } 00968 00969 return best_index; 00970 } 00971 00972 00976 void findExactNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec) 00977 { 00978 // Ignore those clusters that are too far away 00979 { 00980 float bsq = (float)flann_dist(vec, vec+veclen_, node->pivot); 00981 float rsq = node->radius; 00982 float wsq = result.worstDist(); 00983 00984 float val = bsq-rsq-wsq; 00985 float val2 = val*val-4*rsq*wsq; 00986 00987 // if (val>0) { 00988 if (val>0 && val2>0) { 00989 return; 00990 } 00991 } 00992 00993 00994 if (node->childs==NULL) { 00995 for (int i=0;i<node->size;++i) { 00996 result.addPoint(dataset[node->indices[i]], node->indices[i]); 00997 } 00998 } 00999 else { 01000 int* sort_indices = new int[branching]; 01001 01002 getCenterOrdering(node, vec, sort_indices); 01003 01004 for (int i=0; i<branching; ++i) { 01005 findExactNN(node->childs[sort_indices[i]],result,vec); 01006 } 01007 01008 delete[] sort_indices; 01009 } 01010 } 01011 01012 01018 void getCenterOrdering(KMeansNode node, const ELEM_TYPE* q, int* sort_indices) 01019 { 01020 float* domain_distances = new float[branching]; 01021 for (int i=0;i<branching;++i) { 01022 float dist = (float)flann_dist(q, q+veclen_, node->childs[i]->pivot); 01023 01024 int j=0; 01025 while (domain_distances[j]<dist && j<i) j++; 01026 for (int k=i;k>j;--k) { 01027 domain_distances[k] = domain_distances[k-1]; 01028 sort_indices[k] = sort_indices[k-1]; 01029 } 01030 domain_distances[j] = dist; 01031 sort_indices[j] = i; 01032 } 01033 delete[] domain_distances; 01034 } 01035 01041 float getDistanceToBorder(float* p, float* c, float* q) 01042 { 01043 float sum = 0; 01044 float sum2 = 0; 01045 01046 for (int i=0;i<veclen_; ++i) { 01047 float t = c[i]-p[i]; 01048 sum += t*(q[i]-(c[i]+p[i])/2); 01049 sum2 += t*t; 01050 } 01051 01052 return sum*sum/sum2; 01053 } 01054 01055 01065 int getMinVarianceClusters(KMeansNode root, KMeansNode* clusters, int clusters_length, float& varianceValue) 01066 { 01067 int clusterCount = 1; 01068 clusters[0] = root; 01069 01070 float meanVariance = root->variance*root->size; 01071 01072 while (clusterCount<clusters_length) { 01073 float minVariance = (std::numeric_limits<float>::max)(); 01074 int splitIndex = -1; 01075 01076 for (int i=0;i<clusterCount;++i) { 01077 if (clusters[i]->childs != NULL) { 01078 01079 float variance = meanVariance - clusters[i]->variance*clusters[i]->size; 01080 01081 for (int j=0;j<branching;++j) { 01082 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size; 01083 } 01084 if (variance<minVariance) { 01085 minVariance = variance; 01086 splitIndex = i; 01087 } 01088 } 01089 } 01090 01091 if (splitIndex==-1) break; 01092 if ( (branching+clusterCount-1) > clusters_length) break; 01093 01094 meanVariance = minVariance; 01095 01096 // split node 01097 KMeansNode toSplit = clusters[splitIndex]; 01098 clusters[splitIndex] = toSplit->childs[0]; 01099 for (int i=1;i<branching;++i) { 01100 clusters[clusterCount++] = toSplit->childs[i]; 01101 } 01102 } 01103 01104 varianceValue = meanVariance/root->size; 01105 return clusterCount; 01106 } 01107 }; 01108 01109 01110 01111 //register_index(KMEANS,KMeansTree) 01112 01113 } // namespace cvflann 01114 01115 #endif //_OPENCV_KMEANSTREE_H_