31 #ifndef OPENCV_FLANN_KMEANS_INDEX_H_
32 #define OPENCV_FLANN_KMEANS_INDEX_H_
63 (*this)[
"branching"] = branching;
67 (*this)[
"centers_init"] = centers_init;
69 (*this)[
"cb_index"] = cb_index;
80 template <
typename Distance>
113 for (index=0; index<
k; ++
index) {
114 bool duplicate =
true;
120 centers_length =
index;
124 centers[
index] = indices[rnd];
126 for (
int j=0; j<
index; ++j) {
127 DistanceType sq = distance_(dataset_[centers[index]], dataset_[centers[j]], dataset_.
cols);
135 centers_length =
index;
151 int n = indices_length;
154 assert(rnd >=0 && rnd < n);
156 centers[0] = indices[rnd];
159 for (index=1; index<
k; ++
index) {
163 for (
int j=0; j<
n; ++j) {
165 for (
int i=1; i<
index; ++i) {
166 DistanceType tmp_dist = distance_(dataset_[centers[i]],dataset_[indices[j]],dataset_.
cols);
176 if (best_index!=-1) {
177 centers[
index] = indices[best_index];
183 centers_length =
index;
202 int n = indices_length;
204 double currentPot = 0;
209 assert(index >=0 && index < n);
210 centers[0] = indices[
index];
212 for (
int i = 0; i <
n; i++) {
213 closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.
cols);
214 currentPot += closestDistSq[i];
218 const int numLocalTries = 1;
222 for (centerCount = 1; centerCount <
k; centerCount++) {
225 double bestNewPot = -1;
226 int bestNewIndex = -1;
227 for (
int localTrial = 0; localTrial < numLocalTries; localTrial++) {
232 for (index = 0; index < n-1; index++) {
233 if (randVal <= closestDistSq[index])
break;
234 else randVal -= closestDistSq[
index];
239 for (
int i = 0; i <
n; i++) newPot +=
std::min( distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.
cols), closestDistSq[i] );
242 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
244 bestNewIndex =
index;
249 centers[centerCount] = indices[bestNewIndex];
250 currentPot = bestNewPot;
251 for (
int i = 0; i <
n; i++) closestDistSq[i] =
std::min( distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.
cols), closestDistSq[i] );
254 centers_length = centerCount;
256 delete[] closestDistSq;
276 Distance
d = Distance())
277 : dataset_(inputData), index_params_(
params), root_(NULL), indices_(NULL), distance_(
d)
281 size_ = dataset_.
rows;
282 veclen_ = dataset_.
cols;
287 iterations_ = (std::numeric_limits<int>::max)();
301 throw FLANNException(
"Unknown algorithm for choosing initial centers.");
322 if (indices_!=NULL) {
367 indices_ =
new int[size_];
368 for (
size_t i=0; i<size_; ++i) {
369 indices_[i] =
int(i);
372 root_ = pool_.
allocate<KMeansNode>();
373 computeNodeStatistics(root_, indices_, (
int)size_);
374 computeClustering(root_, indices_, (
int)size_, branching_,0);
386 save_tree(stream, root_);
396 if (indices_!=NULL) {
399 indices_ =
new int[size_];
405 load_tree(stream, root_);
407 index_params_[
"algorithm"] =
getType();
408 index_params_[
"branching"] = branching_;
409 index_params_[
"iterations"] = iterations_;
410 index_params_[
"centers_init"] = centers_init_;
411 index_params_[
"cb_index"] = cb_index_;
428 int maxChecks =
get_param(searchParams,
"checks",32);
431 findExactNN(root_, result, vec);
438 findNN(root_, result, vec, checks, maxChecks, heap);
441 while (heap->
popMin(branch) && (checks<maxChecks || !result.
full())) {
443 findNN(node, result, vec, checks, maxChecks, heap);
445 assert(result.
full());
461 int numClusters = centers.
rows;
467 KMeansNodePtr* clusters =
new KMeansNodePtr[numClusters];
469 int clusterCount = getMinVarianceClusters(root_, clusters, numClusters, variance);
471 Logger::info(
"Clusters requested: %d, returning %d\n",numClusters, clusterCount);
473 for (
int i=0; i<clusterCount; ++i) {
475 for (
size_t j=0; j<veclen_; ++j) {
476 centers[i][j] = center[j];
486 return index_params_;
529 typedef KMeansNode* KMeansNodePtr;
534 typedef BranchStruct<KMeansNodePtr, DistanceType> BranchSt;
539 void save_tree(FILE* stream, KMeansNodePtr
node)
542 save_value(stream, *(node->pivot), (
int)veclen_);
543 if (node->childs==NULL) {
544 int indices_offset = (
int)(node->indices - indices_);
548 for(
int i=0; i<branching_; ++i) {
549 save_tree(stream, node->childs[i]);
555 void load_tree(FILE* stream, KMeansNodePtr& node)
557 node = pool_.
allocate<KMeansNode>();
560 load_value(stream, *(node->pivot), (
int)veclen_);
561 if (node->childs==NULL) {
564 node->indices = indices_ + indices_offset;
567 node->childs = pool_.
allocate<KMeansNodePtr>(branching_);
568 for(
int i=0; i<branching_; ++i) {
569 load_tree(stream, node->childs[i]);
578 void free_centers(KMeansNodePtr node)
580 delete[] node->pivot;
581 if (node->childs!=NULL) {
582 for (
int k=0;
k<branching_; ++
k) {
583 free_centers(node->childs[
k]);
595 void computeNodeStatistics(KMeansNodePtr node,
int*
indices,
int indices_length)
605 for (
size_t i=0; i<size_; ++i) {
607 for (
size_t j=0; j<veclen_; ++j) {
610 variance += distance_(vec, ZeroIterator<ElementType>(), veclen_);
612 for (
size_t j=0; j<veclen_; ++j) {
616 variance -= distance_(mean, ZeroIterator<ElementType>(), veclen_);
619 for (
int i=0; i<indices_length; ++i) {
620 tmp = distance_(mean, dataset_[indices[i]], veclen_);
626 node->variance = variance;
643 void computeClustering(KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
int level)
645 node->size = indices_length;
648 if (indices_length < branching) {
650 std::sort(node->indices,node->indices+indices_length);
655 int* centers_idx =
new int[branching];
657 (this->*
chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
659 if (centers_length<branching) {
661 std::sort(node->indices,node->indices+indices_length);
663 delete [] centers_idx;
668 Matrix<double> dcenters(
new double[branching*veclen_],branching,veclen_);
669 for (
int i=0; i<centers_length; ++i) {
671 for (
size_t k=0;
k<veclen_; ++
k) {
675 delete[] centers_idx;
677 std::vector<DistanceType> radiuses(branching);
678 int*
count =
new int[branching];
679 for (
int i=0; i<branching; ++i) {
685 int* belongs_to =
new int[indices_length];
686 for (
int i=0; i<indices_length; ++i) {
688 DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
690 for (
int j=1; j<branching; ++j) {
691 DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
692 if (sq_dist>new_sq_dist) {
694 sq_dist = new_sq_dist;
697 if (sq_dist>radiuses[belongs_to[i]]) {
698 radiuses[belongs_to[i]] = sq_dist;
700 count[belongs_to[i]]++;
703 bool converged =
false;
705 while (!converged && iteration<iterations_) {
710 for (
int i=0; i<branching; ++i) {
711 memset(dcenters[i],0,
sizeof(
double)*veclen_);
714 for (
int i=0; i<indices_length; ++i) {
716 double*
center = dcenters[belongs_to[i]];
717 for (
size_t k=0;
k<veclen_; ++
k) {
721 for (
int i=0; i<branching; ++i) {
723 for (
size_t k=0;
k<veclen_; ++
k) {
724 dcenters[i][
k] /= cnt;
729 for (
int i=0; i<indices_length; ++i) {
730 DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
731 int new_centroid = 0;
732 for (
int j=1; j<branching; ++j) {
733 DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
734 if (sq_dist>new_sq_dist) {
736 sq_dist = new_sq_dist;
739 if (sq_dist>radiuses[new_centroid]) {
740 radiuses[new_centroid] = sq_dist;
742 if (new_centroid != belongs_to[i]) {
743 count[belongs_to[i]]--;
744 count[new_centroid]++;
745 belongs_to[i] = new_centroid;
751 for (
int i=0; i<branching; ++i) {
755 int j = (i+1)%branching;
756 while (count[j]<=1) {
760 for (
int k=0;
k<indices_length; ++
k) {
761 if (belongs_to[
k]==j) {
763 if ( distance_(dataset_[indices[
k]], dcenters[j], veclen_) == radiuses[j] ) {
779 for (
int i=0; i<branching; ++i) {
782 for (
size_t k=0;
k<veclen_; ++
k) {
789 node->childs = pool_.
allocate<KMeansNodePtr>(branching);
792 for (
int c=0;
c<branching; ++
c) {
797 for (
int i=0; i<indices_length; ++i) {
798 if (belongs_to[i]==
c) {
799 DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
801 mean_radius +=
sqrt(d);
803 std::swap(belongs_to[i],belongs_to[end]);
809 variance -= distance_(centers[
c], ZeroIterator<ElementType>(), veclen_);
811 node->childs[
c] = pool_.
allocate<KMeansNode>();
812 node->childs[
c]->radius = radiuses[
c];
813 node->childs[
c]->pivot = centers[
c];
814 node->childs[
c]->variance = variance;
815 node->childs[
c]->mean_radius = mean_radius;
816 node->childs[
c]->indices = NULL;
817 computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
821 delete[] dcenters.data;
842 void findNN(KMeansNodePtr node, ResultSet<DistanceType>&
result,
const ElementType* vec,
int& checks,
int maxChecks,
843 Heap<BranchSt>* heap)
847 DistanceType bsq = distance_(vec, node->pivot, veclen_);
855 if ((val>0)&&(val2>0)) {
860 if (node->childs==NULL) {
861 if (checks>=maxChecks) {
862 if (result.full())
return;
864 checks += node->size;
865 for (
int i=0; i<node->size; ++i) {
866 int index = node->indices[i];
868 result.addPoint(dist, index);
873 int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
874 delete[] domain_distances;
875 findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
887 int exploreNodeBranches(KMeansNodePtr node,
const ElementType*
q,
DistanceType* domain_distances, Heap<BranchSt>* heap)
891 domain_distances[best_index] = distance_(q, node->childs[best_index]->pivot, veclen_);
892 for (
int i=1; i<branching_; ++i) {
893 domain_distances[i] = distance_(q, node->childs[i]->pivot, veclen_);
894 if (domain_distances[i]<domain_distances[best_index]) {
900 for (
int i=0; i<branching_; ++i) {
901 if (i != best_index) {
902 domain_distances[i] -= cb_index_*node->childs[i]->variance;
908 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
919 void findExactNN(KMeansNodePtr node, ResultSet<DistanceType>& result,
const ElementType* vec)
923 DistanceType bsq = distance_(vec, node->pivot, veclen_);
931 if ((val>0)&&(val2>0)) {
937 if (node->childs==NULL) {
938 for (
int i=0; i<node->size; ++i) {
939 int index = node->indices[i];
941 result.addPoint(dist, index);
945 int* sort_indices =
new int[branching_];
947 getCenterOrdering(node, vec, sort_indices);
949 for (
int i=0; i<branching_; ++i) {
950 findExactNN(node->childs[sort_indices[i]],result,vec);
953 delete[] sort_indices;
963 void getCenterOrdering(KMeansNodePtr node,
const ElementType* q,
int* sort_indices)
966 for (
int i=0; i<branching_; ++i) {
970 while (domain_distances[j]<dist && j<i) j++;
971 for (
int k=i;
k>j; --
k) {
972 domain_distances[
k] = domain_distances[
k-1];
973 sort_indices[
k] = sort_indices[
k-1];
975 domain_distances[j] =
dist;
978 delete[] domain_distances;
991 for (
int i=0; i<veclen_; ++i) {
993 sum += t*(q[i]-(c[i]+p[i])/2);
1010 int getMinVarianceClusters(KMeansNodePtr root, KMeansNodePtr* clusters,
int clusters_length,
DistanceType& varianceValue)
1012 int clusterCount = 1;
1017 while (clusterCount<clusters_length) {
1018 DistanceType minVariance = (std::numeric_limits<DistanceType>::max)();
1019 int splitIndex = -1;
1021 for (
int i=0; i<clusterCount; ++i) {
1022 if (clusters[i]->childs != NULL) {
1024 DistanceType variance = meanVariance - clusters[i]->variance*clusters[i]->size;
1026 for (
int j=0; j<branching_; ++j) {
1027 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
1029 if (variance<minVariance) {
1030 minVariance = variance;
1036 if (splitIndex==-1)
break;
1037 if ( (branching_+clusterCount-1) > clusters_length)
break;
1039 meanVariance = minVariance;
1042 KMeansNodePtr toSplit = clusters[splitIndex];
1043 clusters[splitIndex] = toSplit->childs[0];
1044 for (
int i=1; i<branching_; ++i) {
1045 clusters[clusterCount++] = toSplit->childs[i];
1049 varianceValue = meanVariance/root->size;
1050 return clusterCount;
1074 const Matrix<ElementType> dataset_;
1092 KMeansNodePtr root_;
1107 PooledAllocator pool_;
1117 #endif //OPENCV_FLANN_KMEANS_INDEX_H_
void buildIndex()
Definition: kmeans_index.h:361
flann_algorithm_t
Definition: defines.h:81
virtual ~KMeansIndex()
Definition: kmeans_index.h:317
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst)
computes square root of each matrix element (dst = src**0.5)
CvFileNode * node
Definition: core_c.h:1638
T get_param(const IndexParams ¶ms, std::string name, const T &default_value)
Definition: params.h:59
int usedMemory
Definition: allocator.h:89
GLint level
Definition: tracking.hpp:88
size_t cols
Definition: matrix.h:52
CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags)
sorts independently each matrix row or each matrix column
Definition: defines.h:108
Definition: kmeans_index.h:81
CvSize CvPoint2D32f int count
Definition: calib3d.hpp:221
int wastedMemory
Definition: allocator.h:90
double rand_double(double high=1.0, double low=0)
Definition: random.h:61
virtual bool full() const =0
Definition: kmeans_index.h:56
T * allocate(size_t count=1)
Definition: allocator.h:178
void chooseCentersRandom(int k, int *indices, int indices_length, int *centers, int ¢ers_length)
Definition: kmeans_index.h:108
Distance::ResultType DistanceType
Definition: kmeans_index.h:85
GLuint index
Definition: core_c.h:986
void saveIndex(FILE *stream)
Saves the index to a stream.
Definition: kmeans_index.h:378
KMeansIndexParams(int branching=32, int iterations=11, flann_centers_init_t centers_init=FLANN_CENTERS_RANDOM, float cb_index=0.2)
Definition: kmeans_index.h:58
size_t size() const
Definition: kmeans_index.h:330
int d
Definition: legacy.hpp:3064
void chooseCentersKMeanspp(int k, int *indices, int indices_length, int *centers, int ¢ers_length)
Definition: kmeans_index.h:200
CvRect r
Definition: core_c.h:1282
CvPoint center
Definition: core_c.h:1290
int usedMemory() const
Definition: kmeans_index.h:353
OutputArray sum
Definition: imgproc.hpp:620
IndexParams getParameters() const
Definition: kmeans_index.h:484
T node
Definition: result_set.h:52
const CvArr const CvArr CvArr * result
Definition: core_c.h:805
typedef void(CV_CDECL *CvMouseCallback)(int event
KMeansIndex & operator=(const KMeansIndex &)
double start
Definition: core_c.h:774
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams)
Definition: kmeans_index.h:425
IplImage CvMemStorage CvSeq int level
Definition: legacy.hpp:2917
GLuint GLuint GLsizei GLenum const GLvoid * indices
Definition: legacy.hpp:3084
int getClusterCenters(Matrix< DistanceType > ¢ers)
Definition: kmeans_index.h:459
void set_cb_index(float index)
Definition: kmeans_index.h:344
GLdouble GLdouble GLdouble GLdouble q
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
computes per-element minimum of two arrays (dst = min(src1, src2))
int index
Definition: core_c.h:309
const CvMat CvMat CvMat int k
Definition: legacy.hpp:3052
flann_algorithm_t getType() const
Definition: kmeans_index.h:263
GLuint GLuint GLsizei count
Definition: core_c.h:973
CvPoint2D32f float float float c
Definition: legacy.hpp:578
void load_value(FILE *stream, T &value, size_t count=1)
Definition: saving.h:147
const CvArr CvArr double int int int iterations
Definition: tracking.hpp:102
flann_centers_init_t
Definition: defines.h:105
void chooseCentersGonzales(int k, int *indices, int indices_length, int *centers, int ¢ers_length)
Definition: kmeans_index.h:149
const CvMat CvMat * indices
Definition: legacy.hpp:3052
Definition: result_set.h:66
size_t veclen() const
Definition: kmeans_index.h:338
CvArr * mean
Definition: core_c.h:802
std::map< std::string, any > IndexParams
Definition: params.h:42
int next()
Definition: random.h:120
GLenum const GLfloat * params
Definition: compat.hpp:688
const GLubyte * c
Definition: legacy.hpp:633
Definition: defines.h:170
void(KMeansIndex::* centersAlgFunction)(int, int *, int, int *, int &)
Definition: kmeans_index.h:89
void loadIndex(FILE *stream)
Loads the index from a stream.
Definition: kmeans_index.h:390
Definition: nn_index.h:48
int n
Definition: legacy.hpp:3070
size_t rows
Definition: matrix.h:51
::max::max int
Definition: functional.hpp:324
centersAlgFunction chooseCenters
Definition: kmeans_index.h:94
KMeansIndex(const Matrix< ElementType > &inputData, const IndexParams ¶ms=KMeansIndexParams(), Distance d=Distance())
Definition: kmeans_index.h:275
double double end
Definition: core_c.h:774
CV_EXPORTS void swap(Mat &a, Mat &b)
swaps two matrices
Definition: defines.h:107
void save_value(FILE *stream, const T &value, size_t count=1)
Definition: saving.h:126
CvPoint int radius
Definition: core_c.h:1290
Distance::ElementType ElementType
Definition: kmeans_index.h:84
bool popMin(T &value)
Definition: heap.h:147
int rand_int(int high=RAND_MAX, int low=0)
Definition: random.h:72
Definition: defines.h:109
CvPoint3D64f double * dist
Definition: legacy.hpp:556
Definition: result_set.h:50
short float uchar uchar uchar uchar uchar ushort int uchar ushort int float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float float int int int float int int int float int CV_CUDEV_IMPLEMENT_VEC_BINARY_OP char CV_CUDEV_IMPLEMENT_VEC_BINARY_OP ushort CV_CUDEV_IMPLEMENT_VEC_BINARY_OP short CV_CUDEV_IMPLEMENT_VEC_BINARY_OP int CV_CUDEV_IMPLEMENT_VEC_BINARY_OP uint CV_CUDEV_IMPLEMENT_VEC_BINARY_OP float CV_CUDEV_IMPLEMENT_VEC_BINARY_OP double int int uint double
Definition: vec_math.hpp:432