Cinder

  • Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

include/opencv2/flann/kmeans_index.h

Go to the documentation of this file.
00001 /***********************************************************************
00002  * Software License Agreement (BSD License)
00003  *
00004  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
00005  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
00006  *
00007  * THE BSD LICENSE
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  *
00013  * 1. Redistributions of source code must retain the above copyright
00014  *    notice, this list of conditions and the following disclaimer.
00015  * 2. Redistributions in binary form must reproduce the above copyright
00016  *    notice, this list of conditions and the following disclaimer in the
00017  *    documentation and/or other materials provided with the distribution.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00020  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00021  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00022  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00023  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00024  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00028  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *************************************************************************/
00030 
00031 #ifndef _OPENCV_KMEANSTREE_H_
00032 #define _OPENCV_KMEANSTREE_H_
00033 
00034 #include <algorithm>
00035 #include <string>
00036 #include <map>
00037 #include <cassert>
00038 #include <limits>
00039 #include <cmath>
00040 
00041 #include "opencv2/flann/general.h"
00042 #include "opencv2/flann/nn_index.h"
00043 #include "opencv2/flann/matrix.h"
00044 #include "opencv2/flann/result_set.h"
00045 #include "opencv2/flann/heap.h"
00046 #include "opencv2/flann/allocator.h"
00047 #include "opencv2/flann/random.h"
00048 
00049 
00050 namespace cvflann
00051 {
00052 
00053 struct CV_EXPORTS KMeansIndexParams : public IndexParams {
00054     KMeansIndexParams(int branching_ = 32, int iterations_ = 11,
00055             flann_centers_init_t centers_init_ = FLANN_CENTERS_RANDOM, float cb_index_ = 0.2 ) :
00056         IndexParams(FLANN_INDEX_KMEANS),
00057         branching(branching_),
00058         iterations(iterations_),
00059         centers_init(centers_init_),
00060         cb_index(cb_index_) {};
00061 
00062     int branching;             // branching factor (for kmeans tree)
00063     int iterations;            // max iterations to perform in one kmeans clustering (kmeans tree)
00064     flann_centers_init_t centers_init;          // algorithm used for picking the initial cluster centers for kmeans tree
00065     float cb_index;            // cluster boundary index. Used when searching the kmeans tree
00066 
00067     void print() const
00068     {
00069         logger().info("Index type: %d\n",(int)algorithm);
00070         logger().info("Branching: %d\n", branching);
00071         logger().info("Iterations: %d\n", iterations);
00072         logger().info("Centres initialisation: %d\n", centers_init);
00073         logger().info("Cluster boundary weight: %g\n", cb_index);
00074     }
00075 
00076 };
00077 
00078 
00085 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type >
00086 class KMeansIndex : public NNIndex<ELEM_TYPE>
00087 {
00088 
00092     int branching;
00093 
00098     int max_iter;
00099 
00106     float cb_index;
00107 
00111     const Matrix<ELEM_TYPE> dataset;
00112 
00113     const IndexParams& index_params;
00114 
00118     size_t size_;
00119 
00123     size_t veclen_;
00124 
00125 
00129     struct KMeansNodeSt {
00133         DIST_TYPE* pivot;
00137         DIST_TYPE radius;
00141         DIST_TYPE mean_radius;
00145         DIST_TYPE variance;
00149         int size;
00153         KMeansNodeSt** childs;
00157         int* indices;
00161         int level;
00162     };
00163     typedef KMeansNodeSt* KMeansNode;
00164 
00165 
00166 
00170     typedef BranchStruct<KMeansNode> BranchSt;
00171 
00172 
00176     KMeansNode root;
00177 
00181     int* indices;
00182 
00183 
00191     PooledAllocator pool;
00192 
00196     int memoryCounter;
00197 
00198 
00199     typedef void (KMeansIndex::*centersAlgFunction)(int, int*, int, int*, int&);
00200 
00204     centersAlgFunction chooseCenters;
00205 
00206 
00207 
00218     void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length)
00219     {
00220         UniqueRandom r(indices_length);
00221 
00222         int index;
00223         for (index=0;index<k;++index) {
00224             bool duplicate = true;
00225             int rnd;
00226             while (duplicate) {
00227                 duplicate = false;
00228                 rnd = r.next();
00229                 if (rnd<0) {
00230                     centers_length = index;
00231                     return;
00232                 }
00233 
00234                 centers[index] = indices[rnd];
00235 
00236                 for (int j=0;j<index;++j) {
00237                     float sq = (float)flann_dist(dataset[centers[index]],dataset[centers[index]]+dataset.cols,dataset[centers[j]]);
00238                     if (sq<1e-16) {
00239                         duplicate = true;
00240                     }
00241                 }
00242             }
00243         }
00244 
00245         centers_length = index;
00246     }
00247 
00248 
00259     void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length)
00260     {
00261         int n = indices_length;
00262 
00263         int rnd = rand_int(n);
00264         assert(rnd >=0 && rnd < n);
00265 
00266         centers[0] = indices[rnd];
00267 
00268         int index;
00269         for (index=1; index<k; ++index) {
00270 
00271             int best_index = -1;
00272             float best_val = 0;
00273             for (int j=0;j<n;++j) {
00274                 float dist = (float)flann_dist(dataset[centers[0]],dataset[centers[0]]+dataset.cols,dataset[indices[j]]);
00275                 for (int i=1;i<index;++i) {
00276                         float tmp_dist = (float)flann_dist(dataset[centers[i]],dataset[centers[i]]+dataset.cols,dataset[indices[j]]);
00277                     if (tmp_dist<dist) {
00278                         dist = tmp_dist;
00279                     }
00280                 }
00281                 if (dist>best_val) {
00282                     best_val = dist;
00283                     best_index = j;
00284                 }
00285             }
00286             if (best_index!=-1) {
00287                 centers[index] = indices[best_index];
00288             }
00289             else {
00290                 break;
00291             }
00292         }
00293         centers_length = index;
00294     }
00295 
00296 
00310     void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length)
00311     {
00312         int n = indices_length;
00313 
00314         double currentPot = 0;
00315         double* closestDistSq = new double[n];
00316 
00317         // Choose one random center and set the closestDistSq values
00318         int index = rand_int(n);
00319         assert(index >=0 && index < n);
00320         centers[0] = indices[index];
00321 
00322         for (int i = 0; i < n; i++) {
00323             closestDistSq[i] = flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]);
00324             currentPot += closestDistSq[i];
00325         }
00326 
00327 
00328         const int numLocalTries = 1;
00329 
00330         // Choose each center
00331         int centerCount;
00332         for (centerCount = 1; centerCount < k; centerCount++) {
00333 
00334             // Repeat several trials
00335             double bestNewPot = -1;
00336             int bestNewIndex = -1;
00337             for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
00338 
00339                 // Choose our center - have to be slightly careful to return a valid answer even accounting
00340                 // for possible rounding errors
00341             double randVal = rand_double(currentPot);
00342                 for (index = 0; index < n-1; index++) {
00343                     if (randVal <= closestDistSq[index])
00344                         break;
00345                     else
00346                         randVal -= closestDistSq[index];
00347                 }
00348 
00349                 // Compute the new potential
00350                 double newPot = 0;
00351                 for (int i = 0; i < n; i++)
00352                     newPot += std::min( flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]), closestDistSq[i] );
00353 
00354                 // Store the best result
00355                 if (bestNewPot < 0 || newPot < bestNewPot) {
00356                     bestNewPot = newPot;
00357                     bestNewIndex = index;
00358                 }
00359             }
00360 
00361             // Add the appropriate center
00362             centers[centerCount] = indices[bestNewIndex];
00363             currentPot = bestNewPot;
00364             for (int i = 0; i < n; i++)
00365                 closestDistSq[i] = std::min( flann_dist(dataset[indices[i]], dataset[indices[i]]+dataset.cols, dataset[indices[bestNewIndex]]), closestDistSq[i] );
00366         }
00367 
00368         centers_length = centerCount;
00369 
00370         delete[] closestDistSq;
00371     }
00372 
00373 
00374 
00375 public:
00376 
00377 
00378     flann_algorithm_t getType() const
00379     {
00380         return FLANN_INDEX_KMEANS;
00381     }
00382 
00390     KMeansIndex(const Matrix<ELEM_TYPE>& inputData, const KMeansIndexParams& params = KMeansIndexParams() )
00391         : dataset(inputData), index_params(params), root(NULL), indices(NULL)
00392     {
00393         memoryCounter = 0;
00394 
00395         size_ = dataset.rows;
00396         veclen_ = dataset.cols;
00397 
00398         branching = params.branching;
00399         max_iter = params.iterations;
00400         if (max_iter<0) {
00401             max_iter = (std::numeric_limits<int>::max)();
00402         }
00403         flann_centers_init_t centersInit = params.centers_init;
00404 
00405         if (centersInit==FLANN_CENTERS_RANDOM) {
00406             chooseCenters = &KMeansIndex::chooseCentersRandom;
00407         }
00408         else if (centersInit==FLANN_CENTERS_GONZALES) {
00409             chooseCenters = &KMeansIndex::chooseCentersGonzales;
00410         }
00411         else if (centersInit==FLANN_CENTERS_KMEANSPP) {
00412                     chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
00413         }
00414         else {
00415             throw FLANNException("Unknown algorithm for choosing initial centers.");
00416         }
00417         cb_index = 0.4f;
00418 
00419     }
00420 
00421 
00427     virtual ~KMeansIndex()
00428     {
00429         if (root != NULL) {
00430             free_centers(root);
00431         }
00432         if (indices!=NULL) {
00433           delete[] indices;
00434         }
00435     }
00436 
00440     size_t size() const
00441     {
00442         return size_;
00443     }
00444 
00448     size_t veclen() const
00449     {
00450         return veclen_;
00451     }
00452 
00453 
00454     void set_cb_index( float index)
00455     {
00456         cb_index = index;
00457     }
00458 
00459 
00464     int usedMemory() const
00465     {
00466         return  pool.usedMemory+pool.wastedMemory+memoryCounter;
00467     }
00468 
00472     void buildIndex()
00473     {
00474         if (branching<2) {
00475             throw FLANNException("Branching factor must be at least 2");
00476         }
00477 
00478         indices = new int[size_];
00479         for (size_t i=0;i<size_;++i) {
00480             indices[i] = (int)i;
00481         }
00482 
00483         root = pool.allocate<KMeansNodeSt>();
00484         computeNodeStatistics(root, indices, (int)size_);
00485         computeClustering(root, indices, (int)size_, branching,0);
00486     }
00487 
00488 
00489     void saveIndex(FILE* stream)
00490     {
00491         save_value(stream, branching);
00492         save_value(stream, max_iter);
00493         save_value(stream, memoryCounter);
00494         save_value(stream, cb_index);
00495         save_value(stream, *indices, (int)size_);
00496 
00497         save_tree(stream, root);
00498     }
00499 
00500 
00501     void loadIndex(FILE* stream)
00502     {
00503         load_value(stream, branching);
00504         load_value(stream, max_iter);
00505         load_value(stream, memoryCounter);
00506         load_value(stream, cb_index);
00507         if (indices!=NULL) {
00508             delete[] indices;
00509         }
00510         indices = new int[size_];
00511         load_value(stream, *indices, (int)size_);
00512 
00513         if (root!=NULL) {
00514             free_centers(root);
00515         }
00516         load_tree(stream, root);
00517     }
00518 
00519 
00529     void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams)
00530     {
00531 
00532         int maxChecks = searchParams.checks;
00533 
00534         if (maxChecks<0) {
00535             findExactNN(root, result, vec);
00536         }
00537         else {
00538              // Priority queue storing intermediate branches in the best-bin-first search
00539             Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
00540 
00541             int checks = 0;
00542 
00543             findNN(root, result, vec, checks, maxChecks, heap);
00544 
00545             BranchSt branch;
00546             while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
00547                 KMeansNode node = branch.node;
00548                 findNN(node, result, vec, checks, maxChecks, heap);
00549             }
00550             assert(result.full());
00551 
00552             delete heap;
00553         }
00554 
00555     }
00556 
00557 
00565     int getClusterCenters(Matrix<DIST_TYPE>& centers)
00566     {
00567         int numClusters = centers.rows;
00568         if (numClusters<1) {
00569             throw FLANNException("Number of clusters must be at least 1");
00570         }
00571 
00572         float variance;
00573         KMeansNode* clusters = new KMeansNode[numClusters];
00574 
00575         int clusterCount = getMinVarianceClusters(root, clusters, numClusters, variance);
00576 
00577 //         logger().info("Clusters requested: %d, returning %d\n",numClusters, clusterCount);
00578 
00579 
00580         for (int i=0;i<clusterCount;++i) {
00581             DIST_TYPE* center = clusters[i]->pivot;
00582             for (size_t j=0;j<veclen_;++j) {
00583                 centers[i][j] = center[j];
00584             }
00585         }
00586         delete[] clusters;
00587 
00588         return clusterCount;
00589     }
00590 
00591     const IndexParams* getParameters() const
00592     {
00593         return &index_params;
00594     }
00595 
00596 
00597 private:
00598 
00599     KMeansIndex& operator=(const KMeansIndex&);
00600     KMeansIndex(const KMeansIndex&);
00601 
00602     void save_tree(FILE* stream, KMeansNode node)
00603     {
00604         save_value(stream, *node);
00605         save_value(stream, *(node->pivot), (int)veclen_);
00606         if (node->childs==NULL) {
00607             int indices_offset = (int)(node->indices - indices);
00608             save_value(stream, indices_offset);
00609         }
00610         else {
00611             for(int i=0; i<branching; ++i) {
00612                 save_tree(stream, node->childs[i]);
00613             }
00614         }
00615     }
00616 
00617 
00618     void load_tree(FILE* stream, KMeansNode& node)
00619     {
00620         node = pool.allocate<KMeansNodeSt>();
00621         load_value(stream, *node);
00622         node->pivot = new DIST_TYPE[veclen_];
00623         load_value(stream, *(node->pivot), (int)veclen_);
00624         if (node->childs==NULL) {
00625             int indices_offset;
00626             load_value(stream, indices_offset);
00627             node->indices = indices + indices_offset;
00628         }
00629         else {
00630             node->childs = pool.allocate<KMeansNode>(branching);
00631             for(int i=0; i<branching; ++i) {
00632                 load_tree(stream, node->childs[i]);
00633             }
00634         }
00635     }
00636 
00637 
00641     void free_centers(KMeansNode node)
00642     {
00643         delete[] node->pivot;
00644         if (node->childs!=NULL) {
00645             for (int k=0;k<branching;++k) {
00646                 free_centers(node->childs[k]);
00647             }
00648         }
00649     }
00650 
00658     void computeNodeStatistics(KMeansNode node, int* indices, int indices_length) {
00659 
00660         double radius = 0;
00661         double variance = 0;
00662         DIST_TYPE* mean = new DIST_TYPE[veclen_];
00663         memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00664 
00665         memset(mean,0,veclen_*sizeof(float));
00666 
00667         for (size_t i=0;i<size_;++i) {
00668             ELEM_TYPE* vec = dataset[indices[i]];
00669             for (size_t j=0;j<veclen_;++j) {
00670                 mean[j] += vec[j];
00671             }
00672             variance += flann_dist(vec,vec+veclen_,zero());
00673         }
00674         for (size_t j=0;j<veclen_;++j) {
00675             mean[j] /= size_;
00676         }
00677         variance /= size_;
00678         variance -= flann_dist(mean,mean+veclen_,zero());
00679 
00680         double tmp = 0;
00681         for (int i=0;i<indices_length;++i) {
00682             tmp = flann_dist(mean, mean + veclen_, dataset[indices[i]]);
00683             if (tmp>radius) {
00684                 radius = tmp;
00685             }
00686         }
00687 
00688         node->variance = (DIST_TYPE)variance;
00689         node->radius = (DIST_TYPE)radius;
00690         node->pivot = mean;
00691     }
00692 
00693 
00705     void computeClustering(KMeansNode node, int* indices, int indices_length, int branching, int level)
00706     {
00707         node->size = indices_length;
00708         node->level = level;
00709 
00710         if (indices_length < branching) {
00711             node->indices = indices;
00712             std::sort(node->indices,node->indices+indices_length);
00713             node->childs = NULL;
00714             return;
00715         }
00716 
00717         int* centers_idx = new int[branching];
00718         int centers_length;
00719         (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
00720 
00721         if (centers_length<branching) {
00722             node->indices = indices;
00723             std::sort(node->indices,node->indices+indices_length);
00724             node->childs = NULL;
00725             return;
00726         }
00727 
00728 
00729         Matrix<double> dcenters(new double[branching*veclen_],branching,(long)veclen_);
00730         for (int i=0; i<centers_length; ++i) {
00731             ELEM_TYPE* vec = dataset[centers_idx[i]];
00732             for (size_t k=0; k<veclen_; ++k) {
00733                 dcenters[i][k] = double(vec[k]);
00734             }
00735         }
00736         delete[] centers_idx;
00737 
00738         float* radiuses = new float[branching];
00739         int* count = new int[branching];
00740         for (int i=0;i<branching;++i) {
00741             radiuses[i] = 0;
00742             count[i] = 0;
00743         }
00744 
00745         //  assign points to clusters
00746         int* belongs_to = new int[indices_length];
00747         for (int i=0;i<indices_length;++i) {
00748 
00749             double sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]] + veclen_ ,dcenters[0]);
00750             belongs_to[i] = 0;
00751             for (int j=1;j<branching;++j) {
00752                 double new_sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_, dcenters[j]);
00753                 if (sq_dist>new_sq_dist) {
00754                     belongs_to[i] = j;
00755                     sq_dist = new_sq_dist;
00756                 }
00757             }
00758             if (sq_dist>radiuses[belongs_to[i]]) {
00759                 radiuses[belongs_to[i]] = (float)sq_dist;
00760             }
00761             count[belongs_to[i]]++;
00762         }
00763 
00764         bool converged = false;
00765         int iteration = 0;
00766         while (!converged && iteration<max_iter) {
00767             converged = true;
00768             iteration++;
00769 
00770             // compute the new cluster centers
00771             for (int i=0;i<branching;++i) {
00772                 memset(dcenters[i],0,sizeof(double)*veclen_);
00773                 radiuses[i] = 0;
00774             }
00775             for (int i=0;i<indices_length;++i) {
00776                 ELEM_TYPE* vec = dataset[indices[i]];
00777                 double* center = dcenters[belongs_to[i]];
00778                 for (size_t k=0;k<veclen_;++k) {
00779                     center[k] += vec[k];
00780                 }
00781             }
00782             for (int i=0;i<branching;++i) {
00783                 int cnt = count[i];
00784                 for (size_t k=0;k<veclen_;++k) {
00785                     dcenters[i][k] /= cnt;
00786                 }
00787             }
00788 
00789             // reassign points to clusters
00790             for (int i=0;i<indices_length;++i) {
00791                 float sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_ ,dcenters[0]);
00792                 int new_centroid = 0;
00793                 for (int j=1;j<branching;++j) {
00794                     float new_sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_,dcenters[j]);
00795                     if (sq_dist>new_sq_dist) {
00796                         new_centroid = j;
00797                         sq_dist = new_sq_dist;
00798                     }
00799                 }
00800                 if (sq_dist>radiuses[new_centroid]) {
00801                     radiuses[new_centroid] = sq_dist;
00802                 }
00803                 if (new_centroid != belongs_to[i]) {
00804                     count[belongs_to[i]]--;
00805                     count[new_centroid]++;
00806                     belongs_to[i] = new_centroid;
00807 
00808                     converged = false;
00809                 }
00810             }
00811 
00812             for (int i=0;i<branching;++i) {
00813                 // if one cluster converges to an empty cluster,
00814                 // move an element into that cluster
00815                 if (count[i]==0) {
00816                     int j = (i+1)%branching;
00817                     while (count[j]<=1) {
00818                         j = (j+1)%branching;
00819                     }
00820 
00821                     for (int k=0;k<indices_length;++k) {
00822                         if (belongs_to[k]==j) {
00823                             belongs_to[k] = i;
00824                             count[j]--;
00825                             count[i]++;
00826                             break;
00827                         }
00828                     }
00829                     converged = false;
00830                 }
00831             }
00832 
00833         }
00834 
00835         DIST_TYPE** centers = new DIST_TYPE*[branching];
00836 
00837         for (int i=0; i<branching; ++i) {
00838             centers[i] = new DIST_TYPE[veclen_];
00839             memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00840             for (size_t k=0; k<veclen_; ++k) {
00841                 centers[i][k] = (DIST_TYPE)dcenters[i][k];
00842             }
00843         }
00844 
00845 
00846         // compute kmeans clustering for each of the resulting clusters
00847         node->childs = pool.allocate<KMeansNode>(branching);
00848         int start = 0;
00849         int end = start;
00850         for (int c=0;c<branching;++c) {
00851             int s = count[c];
00852 
00853             double variance = 0;
00854             double mean_radius =0;
00855             for (int i=0;i<indices_length;++i) {
00856                 if (belongs_to[i]==c) {
00857                     double d = flann_dist(dataset[indices[i]],dataset[indices[i]]+veclen_,zero());
00858                     variance += d;
00859                     mean_radius += sqrt(d);
00860                     std::swap(indices[i],indices[end]);
00861                     std::swap(belongs_to[i],belongs_to[end]);
00862                     end++;
00863                 }
00864             }
00865             variance /= s;
00866             mean_radius /= s;
00867             variance -= flann_dist(centers[c],centers[c]+veclen_,zero());
00868 
00869             node->childs[c] = pool.allocate<KMeansNodeSt>();
00870             node->childs[c]->radius = radiuses[c];
00871             node->childs[c]->pivot = centers[c];
00872             node->childs[c]->variance = (float)variance;
00873             node->childs[c]->mean_radius = (float)mean_radius;
00874             node->childs[c]->indices = NULL;
00875             computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
00876             start=end;
00877         }
00878 
00879         delete[] dcenters.data;
00880         delete[] centers;
00881         delete[] radiuses;
00882         delete[] count;
00883         delete[] belongs_to;
00884     }
00885 
00886 
00887 
00901     void findNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, int& checks, int maxChecks,
00902             Heap<BranchSt>* heap)
00903     {
00904         // Ignore those clusters that are too far away
00905         {
00906             DIST_TYPE bsq = (DIST_TYPE)flann_dist(vec, vec+veclen_, node->pivot);
00907             DIST_TYPE rsq = node->radius;
00908             DIST_TYPE wsq = result.worstDist();
00909 
00910             DIST_TYPE val = bsq-rsq-wsq;
00911             DIST_TYPE val2 = val*val-4*rsq*wsq;
00912 
00913             //if (val>0) {
00914             if (val>0 && val2>0) {
00915                 return;
00916             }
00917         }
00918 
00919         if (node->childs==NULL) {
00920             if (checks>=maxChecks) {
00921                 if (result.full()) return;
00922             }
00923             checks += node->size;
00924             for (int i=0;i<node->size;++i) {
00925                 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00926             }
00927         }
00928         else {
00929             float* domain_distances = new float[branching];
00930             int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
00931             delete[] domain_distances;
00932             findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
00933         }
00934     }
00935 
00944     int exploreNodeBranches(KMeansNode node, const ELEM_TYPE* q, float* domain_distances, Heap<BranchSt>* heap)
00945     {
00946 
00947         int best_index = 0;
00948         domain_distances[best_index] = (float)flann_dist(q,q+veclen_,node->childs[best_index]->pivot);
00949         for (int i=1;i<branching;++i) {
00950             domain_distances[i] = (float)flann_dist(q,q+veclen_,node->childs[i]->pivot);
00951             if (domain_distances[i]<domain_distances[best_index]) {
00952                 best_index = i;
00953             }
00954         }
00955 
00956 //      float* best_center = node->childs[best_index]->pivot;
00957         for (int i=0;i<branching;++i) {
00958             if (i != best_index) {
00959                 domain_distances[i] -= cb_index*node->childs[i]->variance;
00960 
00961 //              float dist_to_border = getDistanceToBorder(node.childs[i].pivot,best_center,q);
00962 //              if (domain_distances[i]<dist_to_border) {
00963 //                  domain_distances[i] = dist_to_border;
00964 //              }
00965                 heap->insert(BranchSt::make_branch(node->childs[i],domain_distances[i]));
00966             }
00967         }
00968 
00969         return best_index;
00970     }
00971 
00972 
00976     void findExactNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec)
00977     {
00978         // Ignore those clusters that are too far away
00979         {
00980             float bsq = (float)flann_dist(vec, vec+veclen_, node->pivot);
00981             float rsq = node->radius;
00982             float wsq = result.worstDist();
00983 
00984             float val = bsq-rsq-wsq;
00985             float val2 = val*val-4*rsq*wsq;
00986 
00987     //          if (val>0) {
00988             if (val>0 && val2>0) {
00989                 return;
00990             }
00991         }
00992 
00993 
00994         if (node->childs==NULL) {
00995             for (int i=0;i<node->size;++i) {
00996                 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00997             }
00998         }
00999         else {
01000             int* sort_indices = new int[branching];
01001 
01002             getCenterOrdering(node, vec, sort_indices);
01003 
01004             for (int i=0; i<branching; ++i) {
01005                 findExactNN(node->childs[sort_indices[i]],result,vec);
01006             }
01007 
01008             delete[] sort_indices;
01009         }
01010     }
01011 
01012 
01018     void getCenterOrdering(KMeansNode node, const ELEM_TYPE* q, int* sort_indices)
01019     {
01020         float* domain_distances = new float[branching];
01021         for (int i=0;i<branching;++i) {
01022             float dist = (float)flann_dist(q, q+veclen_, node->childs[i]->pivot);
01023 
01024             int j=0;
01025             while (domain_distances[j]<dist && j<i) j++;
01026             for (int k=i;k>j;--k) {
01027                 domain_distances[k] = domain_distances[k-1];
01028                 sort_indices[k] = sort_indices[k-1];
01029             }
01030             domain_distances[j] = dist;
01031             sort_indices[j] = i;
01032         }
01033         delete[] domain_distances;
01034     }
01035 
01041     float getDistanceToBorder(float* p, float* c, float* q)
01042     {
01043         float sum = 0;
01044         float sum2 = 0;
01045 
01046         for (int i=0;i<veclen_; ++i) {
01047             float t = c[i]-p[i];
01048             sum += t*(q[i]-(c[i]+p[i])/2);
01049             sum2 += t*t;
01050         }
01051 
01052         return sum*sum/sum2;
01053     }
01054 
01055 
01065     int getMinVarianceClusters(KMeansNode root, KMeansNode* clusters, int clusters_length, float& varianceValue)
01066     {
01067         int clusterCount = 1;
01068         clusters[0] = root;
01069 
01070         float meanVariance = root->variance*root->size;
01071 
01072         while (clusterCount<clusters_length) {
01073             float minVariance = (std::numeric_limits<float>::max)();
01074             int splitIndex = -1;
01075 
01076             for (int i=0;i<clusterCount;++i) {
01077                 if (clusters[i]->childs != NULL) {
01078 
01079                     float variance = meanVariance - clusters[i]->variance*clusters[i]->size;
01080 
01081                     for (int j=0;j<branching;++j) {
01082                         variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
01083                     }
01084                     if (variance<minVariance) {
01085                         minVariance = variance;
01086                         splitIndex = i;
01087                     }
01088                 }
01089             }
01090 
01091             if (splitIndex==-1) break;
01092             if ( (branching+clusterCount-1) > clusters_length) break;
01093 
01094             meanVariance = minVariance;
01095 
01096             // split node
01097             KMeansNode toSplit = clusters[splitIndex];
01098             clusters[splitIndex] = toSplit->childs[0];
01099             for (int i=1;i<branching;++i) {
01100                 clusters[clusterCount++] = toSplit->childs[i];
01101             }
01102         }
01103 
01104         varianceValue = meanVariance/root->size;
01105         return clusterCount;
01106     }
01107 };
01108 
01109 
01110 
01111 //register_index(KMEANS,KMeansTree)
01112 
01113 } // namespace cvflann
01114 
01115 #endif //_OPENCV_KMEANSTREE_H_