00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef OPENCV_FLANN_LSH_INDEX_H_
00036 #define OPENCV_FLANN_LSH_INDEX_H_
00037
00038 #include <algorithm>
00039 #include <cassert>
00040 #include <cstring>
00041 #include <map>
00042 #include <vector>
00043
00044 #include "general.h"
00045 #include "nn_index.h"
00046 #include "matrix.h"
00047 #include "result_set.h"
00048 #include "heap.h"
00049 #include "lsh_table.h"
00050 #include "allocator.h"
00051 #include "random.h"
00052 #include "saving.h"
00053
00054 namespace cvflann
00055 {
00056
00057 struct LshIndexParams : public IndexParams
00058 {
00059 LshIndexParams(unsigned int table_number = 12, unsigned int key_size = 20, unsigned int multi_probe_level = 2)
00060 {
00061 (* this)["algorithm"] = FLANN_INDEX_LSH;
00062
00063 (*this)["table_number"] = table_number;
00064
00065 (*this)["key_size"] = key_size;
00066
00067 (*this)["multi_probe_level"] = multi_probe_level;
00068 }
00069 };
00070
00077 template<typename Distance>
00078 class LshIndex : public NNIndex<Distance>
00079 {
00080 public:
00081 typedef typename Distance::ElementType ElementType;
00082 typedef typename Distance::ResultType DistanceType;
00083
00089 LshIndex(const Matrix<ElementType>& input_data, const IndexParams& params = LshIndexParams(),
00090 Distance d = Distance()) :
00091 dataset_(input_data), index_params_(params), distance_(d)
00092 {
00093
00094
00095 table_number_ = (unsigned int)get_param<int>(index_params_,"table_number",12);
00096 key_size_ = (unsigned int)get_param<int>(index_params_,"key_size",20);
00097 multi_probe_level_ = (unsigned int)get_param<int>(index_params_,"multi_probe_level",2);
00098
00099 feature_size_ = (unsigned)dataset_.cols;
00100 fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_);
00101 }
00102
00103
00104 LshIndex(const LshIndex&);
00105 LshIndex& operator=(const LshIndex&);
00106
00110 void buildIndex()
00111 {
00112 tables_.resize(table_number_);
00113 for (unsigned int i = 0; i < table_number_; ++i) {
00114 lsh::LshTable<ElementType>& table = tables_[i];
00115 table = lsh::LshTable<ElementType>(feature_size_, key_size_);
00116
00117
00118 table.add(dataset_);
00119 }
00120 }
00121
00122 flann_algorithm_t getType() const
00123 {
00124 return FLANN_INDEX_LSH;
00125 }
00126
00127
00128 void saveIndex(FILE* stream)
00129 {
00130 save_value(stream,table_number_);
00131 save_value(stream,key_size_);
00132 save_value(stream,multi_probe_level_);
00133 save_value(stream, dataset_);
00134 }
00135
00136 void loadIndex(FILE* stream)
00137 {
00138 load_value(stream, table_number_);
00139 load_value(stream, key_size_);
00140 load_value(stream, multi_probe_level_);
00141 load_value(stream, dataset_);
00142
00143 buildIndex();
00144
00145 index_params_["algorithm"] = getType();
00146 index_params_["table_number"] = table_number_;
00147 index_params_["key_size"] = key_size_;
00148 index_params_["multi_probe_level"] = multi_probe_level_;
00149 }
00150
00154 size_t size() const
00155 {
00156 return dataset_.rows;
00157 }
00158
00162 size_t veclen() const
00163 {
00164 return feature_size_;
00165 }
00166
00171 int usedMemory() const
00172 {
00173 return (int)(dataset_.rows * sizeof(int));
00174 }
00175
00176
00177 IndexParams getParameters() const
00178 {
00179 return index_params_;
00180 }
00181
00190 virtual void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
00191 {
00192 assert(queries.cols == veclen());
00193 assert(indices.rows >= queries.rows);
00194 assert(dists.rows >= queries.rows);
00195 assert(int(indices.cols) >= knn);
00196 assert(int(dists.cols) >= knn);
00197
00198
00199 KNNUniqueResultSet<DistanceType> resultSet(knn);
00200 for (size_t i = 0; i < queries.rows; i++) {
00201 resultSet.clear();
00202 std::fill_n(indices[i], knn, -1);
00203 std::fill_n(dists[i], knn, std::numeric_limits<DistanceType>::max());
00204 findNeighbors(resultSet, queries[i], params);
00205 if (get_param(params,"sorted",true)) resultSet.sortAndCopy(indices[i], dists[i], knn);
00206 else resultSet.copy(indices[i], dists[i], knn);
00207 }
00208 }
00209
00210
00220 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& )
00221 {
00222 getNeighbors(vec, result);
00223 }
00224
00225 private:
00228 typedef std::pair<float, unsigned int> ScoreIndexPair;
00229 struct SortScoreIndexPairOnSecond
00230 {
00231 bool operator()(const ScoreIndexPair& left, const ScoreIndexPair& right) const
00232 {
00233 return left.second < right.second;
00234 }
00235 };
00236
00243 void fill_xor_mask(lsh::BucketKey key, int lowest_index, unsigned int level,
00244 std::vector<lsh::BucketKey>& xor_masks)
00245 {
00246 xor_masks.push_back(key);
00247 if (level == 0) return;
00248 for (int index = lowest_index - 1; index >= 0; --index) {
00249
00250 lsh::BucketKey new_key = key | (1 << index);
00251 fill_xor_mask(new_key, index, level - 1, xor_masks);
00252 }
00253 }
00254
00263 void getNeighbors(const ElementType* vec, bool , float radius, bool do_k, unsigned int k_nn,
00264 float& )
00265 {
00266 static std::vector<ScoreIndexPair> score_index_heap;
00267
00268 if (do_k) {
00269 unsigned int worst_score = std::numeric_limits<unsigned int>::max();
00270 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
00271 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
00272 for (; table != table_end; ++table) {
00273 size_t key = table->getKey(vec);
00274 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
00275 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
00276 for (; xor_mask != xor_mask_end; ++xor_mask) {
00277 size_t sub_key = key ^ (*xor_mask);
00278 const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
00279 if (bucket == 0) continue;
00280
00281
00282 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
00283 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
00284 DistanceType hamming_distance;
00285
00286
00287 for (; training_index < last_training_index; ++training_index) {
00288 hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
00289
00290 if (hamming_distance < worst_score) {
00291
00292 score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
00293 std::push_heap(score_index_heap.begin(), score_index_heap.end());
00294
00295 if (score_index_heap.size() > (unsigned int)k_nn) {
00296
00297 std::pop_heap(score_index_heap.begin(), score_index_heap.end());
00298 score_index_heap.pop_back();
00299
00300 worst_score = score_index_heap.front().first;
00301 }
00302 }
00303 }
00304 }
00305 }
00306 }
00307 else {
00308 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
00309 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
00310 for (; table != table_end; ++table) {
00311 size_t key = table->getKey(vec);
00312 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
00313 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
00314 for (; xor_mask != xor_mask_end; ++xor_mask) {
00315 size_t sub_key = key ^ (*xor_mask);
00316 const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
00317 if (bucket == 0) continue;
00318
00319
00320 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
00321 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
00322 DistanceType hamming_distance;
00323
00324
00325 for (; training_index < last_training_index; ++training_index) {
00326
00327 hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
00328 if (hamming_distance < radius) score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
00329 }
00330 }
00331 }
00332 }
00333 }
00334
00339 void getNeighbors(const ElementType* vec, ResultSet<DistanceType>& result)
00340 {
00341 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
00342 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
00343 for (; table != table_end; ++table) {
00344 size_t key = table->getKey(vec);
00345 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
00346 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
00347 for (; xor_mask != xor_mask_end; ++xor_mask) {
00348 size_t sub_key = key ^ (*xor_mask);
00349 const lsh::Bucket* bucket = table->getBucketFromKey((lsh::BucketKey)sub_key);
00350 if (bucket == 0) continue;
00351
00352
00353 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
00354 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
00355 DistanceType hamming_distance;
00356
00357
00358 for (; training_index < last_training_index; ++training_index) {
00359
00360 hamming_distance = distance_(vec, dataset_[*training_index], (int)dataset_.cols);
00361 result.addPoint(hamming_distance, *training_index);
00362 }
00363 }
00364 }
00365 }
00366
00368 std::vector<lsh::LshTable<ElementType> > tables_;
00369
00371 Matrix<ElementType> dataset_;
00372
00374 unsigned int feature_size_;
00375
00376 IndexParams index_params_;
00377
00379 unsigned int table_number_;
00381 unsigned int key_size_;
00383 unsigned int multi_probe_level_;
00384
00386 std::vector<lsh::BucketKey> xor_masks_;
00387
00388 Distance distance_;
00389 };
00390 }
00391
00392 #endif //OPENCV_FLANN_LSH_INDEX_H_