Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef OPENCV_FLANN_LSH_TABLE_H_
00036 #define OPENCV_FLANN_LSH_TABLE_H_
00037
00038 #include <algorithm>
00039 #include <iostream>
00040 #include <iomanip>
00041 #include <limits.h>
00042
00043 #ifdef __GXX_EXPERIMENTAL_CXX0X__
00044 # define USE_UNORDERED_MAP 1
00045 #else
00046 # define USE_UNORDERED_MAP 0
00047 #endif
00048 #if USE_UNORDERED_MAP
00049 #include <unordered_map>
00050 #else
00051 #include <map>
00052 #endif
00053 #include <math.h>
00054 #include <stddef.h>
00055
00056 #include "dynamic_bitset.h"
00057 #include "matrix.h"
00058
00059 namespace cvflann
00060 {
00061
00062 namespace lsh
00063 {
00064
00066
00069 typedef uint32_t FeatureIndex;
00072 typedef unsigned int BucketKey;
00073
00076 typedef std::vector<FeatureIndex> Bucket;
00077
00079
00082 struct LshStats
00083 {
00084 std::vector<unsigned int> bucket_sizes_;
00085 size_t n_buckets_;
00086 size_t bucket_size_mean_;
00087 size_t bucket_size_median_;
00088 size_t bucket_size_min_;
00089 size_t bucket_size_max_;
00090 size_t bucket_size_std_dev;
00093 std::vector<std::vector<unsigned int> > size_histogram_;
00094 };
00095
00101 inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
00102 {
00103 int w = 20;
00104 out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
00105 << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
00106 << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
00107 << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
00108 << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
00109 << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
00110 << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
00111
00112
00113 out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
00114 << std::setiosflags(std::ios::left);
00115 for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
00116 stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", ";
00117
00118 return out;
00119 }
00120
00121
00123
00129 template<typename ElementType>
00130 class LshTable
00131 {
00132 public:
00135 #if USE_UNORDERED_MAP
00136 typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
00137 #else
00138 typedef std::map<BucketKey, Bucket> BucketsSpace;
00139 #endif
00140
00143 typedef std::vector<Bucket> BucketsSpeed;
00144
00147 LshTable()
00148 {
00149 }
00150
00156 LshTable(unsigned int , unsigned int )
00157 {
00158 std::cerr << "LSH is not implemented for that type" << std::endl;
00159 assert(0);
00160 }
00161
00166 void add(unsigned int value, const ElementType* feature)
00167 {
00168
00169 BucketKey key = (lsh::BucketKey)getKey(feature);
00170
00171 switch (speed_level_) {
00172 case kArray:
00173
00174 buckets_speed_[key].push_back(value);
00175 break;
00176 case kBitsetHash:
00177
00178 key_bitset_.set(key);
00179 buckets_space_[key].push_back(value);
00180 break;
00181 case kHash:
00182 {
00183
00184 buckets_space_[key].push_back(value);
00185 break;
00186 }
00187 }
00188 }
00189
00193 void add(Matrix<ElementType> dataset)
00194 {
00195 #if USE_UNORDERED_MAP
00196 buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2);
00197 #endif
00198
00199 for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]);
00200
00201 optimize();
00202 }
00203
00208 inline const Bucket* getBucketFromKey(BucketKey key) const
00209 {
00210
00211 switch (speed_level_) {
00212 case kArray:
00213
00214 return &buckets_speed_[key];
00215 break;
00216 case kBitsetHash:
00217
00218 if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
00219 else return 0;
00220 break;
00221 case kHash:
00222 {
00223
00224 BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
00225 bucket_it = buckets_space_.find(key);
00226
00227 if (bucket_it == bucket_end) return 0;
00228 else return &bucket_it->second;
00229 break;
00230 }
00231 }
00232 return 0;
00233 }
00234
00237 size_t getKey(const ElementType* ) const
00238 {
00239 std::cerr << "LSH is not implemented for that type" << std::endl;
00240 assert(0);
00241 return 1;
00242 }
00243
00247 LshStats getStats() const;
00248
00249 private:
00255 enum SpeedLevel
00256 {
00257 kArray, kBitsetHash, kHash
00258 };
00259
00262 void initialize(size_t key_size)
00263 {
00264 speed_level_ = kHash;
00265 key_size_ = (unsigned)key_size;
00266 }
00267
00270 void optimize()
00271 {
00272
00273 if (speed_level_ == kArray) return;
00274
00275
00276 if (buckets_space_.size() > (unsigned int)((1 << key_size_) / 2)) {
00277 speed_level_ = kArray;
00278
00279 buckets_speed_.resize(1 << key_size_);
00280 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
00281
00282
00283 buckets_space_.clear();
00284 return;
00285 }
00286
00287
00288
00289 if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
00290 >= size_t(1 << key_size_)) || (key_size_ <= 32)) {
00291 speed_level_ = kBitsetHash;
00292 key_bitset_.resize(1 << key_size_);
00293 key_bitset_.reset();
00294
00295 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
00296 }
00297 else {
00298 speed_level_ = kHash;
00299 key_bitset_.clear();
00300 }
00301 }
00302
00305 BucketsSpeed buckets_speed_;
00306
00309 BucketsSpace buckets_space_;
00310
00312 SpeedLevel speed_level_;
00313
00317 DynamicBitset key_bitset_;
00318
00321 unsigned int key_size_;
00322
00323
00327 std::vector<size_t> mask_;
00328 };
00329
00331
00332
00333 template<>
00334 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
00335 {
00336 initialize(subsignature_size);
00337
00338 mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
00339
00340
00341 std::vector<size_t> indices(feature_size * CHAR_BIT);
00342 for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = i;
00343 std::random_shuffle(indices.begin(), indices.end());
00344
00345
00346 for (unsigned int i = 0; i < key_size_; ++i) {
00347 size_t index = indices[i];
00348
00349
00350 size_t divisor = CHAR_BIT * sizeof(size_t);
00351 size_t idx = index / divisor;
00352 mask_[idx] |= size_t(1) << (index % divisor);
00353 }
00354
00355
00356 #if 0
00357 {
00358 size_t bcount = 0;
00359 BOOST_FOREACH(size_t mask_block, mask_){
00360 out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
00361 << std::endl;
00362 bcount += __builtin_popcountll(mask_block);
00363 }
00364 out << "bit count : " << std::dec << bcount << std::endl;
00365 out << "mask size : " << mask_.size() << std::endl;
00366 return out;
00367 }
00368 #endif
00369 }
00370
00374 template<>
00375 inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
00376 {
00377
00378
00379 const size_t* feature_block_ptr = reinterpret_cast<const size_t*> (feature);
00380
00381
00382
00383
00384 size_t subsignature = 0;
00385 size_t bit_index = 1;
00386
00387 for (std::vector<size_t>::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) {
00388
00389 size_t feature_block = *feature_block_ptr;
00390 size_t mask_block = *pmask_block;
00391 while (mask_block) {
00392
00393 size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
00394
00395 subsignature += (feature_block & lowest_bit) ? bit_index : 0;
00396
00397 mask_block ^= lowest_bit;
00398
00399 bit_index <<= 1;
00400 }
00401
00402 ++feature_block_ptr;
00403 }
00404 return subsignature;
00405 }
00406
00407 template<>
00408 inline LshStats LshTable<unsigned char>::getStats() const
00409 {
00410 LshStats stats;
00411 stats.bucket_size_mean_ = 0;
00412 if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
00413 stats.n_buckets_ = 0;
00414 stats.bucket_size_median_ = 0;
00415 stats.bucket_size_min_ = 0;
00416 stats.bucket_size_max_ = 0;
00417 return stats;
00418 }
00419
00420 if (!buckets_speed_.empty()) {
00421 for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
00422 stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size());
00423 stats.bucket_size_mean_ += pbucket->size();
00424 }
00425 stats.bucket_size_mean_ /= buckets_speed_.size();
00426 stats.n_buckets_ = buckets_speed_.size();
00427 }
00428 else {
00429 for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
00430 stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size());
00431 stats.bucket_size_mean_ += x->second.size();
00432 }
00433 stats.bucket_size_mean_ /= buckets_space_.size();
00434 stats.n_buckets_ = buckets_space_.size();
00435 }
00436
00437 std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
00438
00439
00440
00441
00442 stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
00443 stats.bucket_size_min_ = stats.bucket_sizes_.front();
00444 stats.bucket_size_max_ = stats.bucket_sizes_.back();
00445
00446
00447
00448
00449
00450
00451
00452 unsigned int bin_start = 0;
00453 unsigned int bin_end = 20;
00454 bool is_new_bin = true;
00455 for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
00456 != end; )
00457 if (*iterator < bin_end) {
00458 if (is_new_bin) {
00459 stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
00460 stats.size_histogram_.back()[0] = bin_start;
00461 stats.size_histogram_.back()[1] = bin_end - 1;
00462 is_new_bin = false;
00463 }
00464 ++stats.size_histogram_.back()[2];
00465 ++iterator;
00466 }
00467 else {
00468 bin_start += 20;
00469 bin_end += 20;
00470 is_new_bin = true;
00471 }
00472
00473 return stats;
00474 }
00475
00476
00477 }
00478 }
00479
00481
00482 #endif