lsh_index.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 /***********************************************************************
32  * Author: Vincent Rabaud
33  *************************************************************************/
34 
35 #ifndef OPENCV_FLANN_LSH_INDEX_H_
36 #define OPENCV_FLANN_LSH_INDEX_H_
37 
38 #include <algorithm>
39 #include <cassert>
40 #include <cstring>
41 #include <map>
42 #include <vector>
43 
44 #include "general.h"
45 #include "nn_index.h"
46 #include "matrix.h"
47 #include "result_set.h"
48 #include "heap.h"
49 #include "lsh_table.h"
50 #include "allocator.h"
51 #include "random.h"
52 #include "saving.h"
53 
54 namespace cvflann
55 {
56 
57 struct LshIndexParams : public IndexParams
58 {
59  LshIndexParams(unsigned int table_number = 12, unsigned int key_size = 20, unsigned int multi_probe_level = 2)
60  {
61  (* this)["algorithm"] = FLANN_INDEX_LSH;
62  // The number of hash tables to use
63  (*this)["table_number"] = table_number;
64  // The length of the key in the hash tables
65  (*this)["key_size"] = key_size;
66  // Number of levels to use in multi-probe (0 for standard LSH)
67  (*this)["multi_probe_level"] = multi_probe_level;
68  }
69 };
70 
77 template<typename Distance>
78 class LshIndex : public NNIndex<Distance>
79 {
80 public:
81  typedef typename Distance::ElementType ElementType;
82  typedef typename Distance::ResultType DistanceType;
83 
90  Distance d = Distance()) :
91  dataset_(input_data), index_params_(params), distance_(d)
92  {
93  // cv::flann::IndexParams sets integer params as 'int', so it is used with get_param
94  // in place of 'unsigned int'
95  table_number_ = (unsigned int)get_param<int>(index_params_,"table_number",12);
96  key_size_ = (unsigned int)get_param<int>(index_params_,"key_size",20);
97  multi_probe_level_ = (unsigned int)get_param<int>(index_params_,"multi_probe_level",2);
98 
99  feature_size_ = (unsigned)dataset_.cols;
100  fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_);
101  }
102 
103 
104  LshIndex(const LshIndex&);
105  LshIndex& operator=(const LshIndex&);
106 
110  void buildIndex()
111  {
112  tables_.resize(table_number_);
113  for (unsigned int i = 0; i < table_number_; ++i) {
114  lsh::LshTable<ElementType>& table = tables_[i];
115  table = lsh::LshTable<ElementType>(feature_size_, key_size_);
116 
117  // Add the features to the table
118  table.add(dataset_);
119  }
120  }
121 
123  {
124  return FLANN_INDEX_LSH;
125  }
126 
127 
128  void saveIndex(FILE* stream)
129  {
130  save_value(stream,table_number_);
131  save_value(stream,key_size_);
132  save_value(stream,multi_probe_level_);
133  save_value(stream, dataset_);
134  }
135 
136  void loadIndex(FILE* stream)
137  {
138  load_value(stream, table_number_);
139  load_value(stream, key_size_);
140  load_value(stream, multi_probe_level_);
141  load_value(stream, dataset_);
142  // Building the index is so fast we can afford not storing it
143  buildIndex();
144 
145  index_params_["algorithm"] = getType();
146  index_params_["table_number"] = table_number_;
147  index_params_["key_size"] = key_size_;
148  index_params_["multi_probe_level"] = multi_probe_level_;
149  }
150 
154  size_t size() const
155  {
156  return dataset_.rows;
157  }
158 
162  size_t veclen() const
163  {
164  return feature_size_;
165  }
166 
171  int usedMemory() const
172  {
173  return (int)(dataset_.rows * sizeof(int));
174  }
175 
176 
178  {
179  return index_params_;
180  }
181 
190  virtual void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
191  {
192  assert(queries.cols == veclen());
193  assert(indices.rows >= queries.rows);
194  assert(dists.rows >= queries.rows);
195  assert(int(indices.cols) >= knn);
196  assert(int(dists.cols) >= knn);
197 
198 
199  KNNUniqueResultSet<DistanceType> resultSet(knn);
200  for (size_t i = 0; i < queries.rows; i++) {
201  resultSet.clear();
202  std::fill_n(indices[i], knn, -1);
203  std::fill_n(dists[i], knn, std::numeric_limits<DistanceType>::max());
204  findNeighbors(resultSet, queries[i], params);
205  if (get_param(params,"sorted",true)) resultSet.sortAndCopy(indices[i], dists[i], knn);
206  else resultSet.copy(indices[i], dists[i], knn);
207  }
208  }
209 
210 
220  void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& /*searchParams*/)
221  {
222  getNeighbors(vec, result);
223  }
224 
225 private:
228  typedef std::pair<float, unsigned int> ScoreIndexPair;
229  struct SortScoreIndexPairOnSecond
230  {
231  bool operator()(const ScoreIndexPair& left, const ScoreIndexPair& right) const
232  {
233  return left.second < right.second;
234  }
235  };
236 
243  void fill_xor_mask(lsh::BucketKey key, int lowest_index, unsigned int level,
244  std::vector<lsh::BucketKey>& xor_masks)
245  {
246  xor_masks.push_back(key);
247  if (level == 0) return;
248  for (int index = lowest_index - 1; index >= 0; --index) {
249  // Create a new key
250  lsh::BucketKey new_key = key | (1 << index);
251  fill_xor_mask(new_key, index, level - 1, xor_masks);
252  }
253  }
254 
263  void getNeighbors(const ElementType* vec, bool /*do_radius*/, float radius, bool do_k, unsigned int k_nn,
264  float& /*checked_average*/)
265  {
266  static std::vector<ScoreIndexPair> score_index_heap;
267 
268  if (do_k) {
269  unsigned int worst_score = std::numeric_limits<unsigned int>::max();
270  typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
271  typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
272  for (; table != table_end; ++table) {
273  size_t key = table->getKey(vec);
274  std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
275  std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
276  for (; xor_mask != xor_mask_end; ++xor_mask) {
277  size_t sub_key = key ^ (*xor_mask);
278  const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
279  if (bucket == 0) continue;
280 
281  // Go over each descriptor index
282  std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
283  std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
284  DistanceType hamming_distance;
285 
286  // Process the rest of the candidates
287  for (; training_index < last_training_index; ++training_index) {
288  hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
289 
290  if (hamming_distance < worst_score) {
291  // Insert the new element
292  score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
293  std::push_heap(score_index_heap.begin(), score_index_heap.end());
294 
295  if (score_index_heap.size() > (unsigned int)k_nn) {
296  // Remove the highest distance value as we have too many elements
297  std::pop_heap(score_index_heap.begin(), score_index_heap.end());
298  score_index_heap.pop_back();
299  // Keep track of the worst score
300  worst_score = score_index_heap.front().first;
301  }
302  }
303  }
304  }
305  }
306  }
307  else {
308  typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
309  typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
310  for (; table != table_end; ++table) {
311  size_t key = table->getKey(vec);
312  std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
313  std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
314  for (; xor_mask != xor_mask_end; ++xor_mask) {
315  size_t sub_key = key ^ (*xor_mask);
316  const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
317  if (bucket == 0) continue;
318 
319  // Go over each descriptor index
320  std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
321  std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
322  DistanceType hamming_distance;
323 
324  // Process the rest of the candidates
325  for (; training_index < last_training_index; ++training_index) {
326  // Compute the Hamming distance
327  hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
328  if (hamming_distance < radius) score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
329  }
330  }
331  }
332  }
333  }
334 
339  void getNeighbors(const ElementType* vec, ResultSet<DistanceType>& result)
340  {
341  typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
342  typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
343  for (; table != table_end; ++table) {
344  size_t key = table->getKey(vec);
345  std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
346  std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
347  for (; xor_mask != xor_mask_end; ++xor_mask) {
348  size_t sub_key = key ^ (*xor_mask);
349  const lsh::Bucket* bucket = table->getBucketFromKey((lsh::BucketKey)sub_key);
350  if (bucket == 0) continue;
351 
352  // Go over each descriptor index
353  std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
354  std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
355  DistanceType hamming_distance;
356 
357  // Process the rest of the candidates
358  for (; training_index < last_training_index; ++training_index) {
359  // Compute the Hamming distance
360  hamming_distance = distance_(vec, dataset_[*training_index], (int)dataset_.cols);
361  result.addPoint(hamming_distance, *training_index);
362  }
363  }
364  }
365  }
366 
368  std::vector<lsh::LshTable<ElementType> > tables_;
369 
371  Matrix<ElementType> dataset_;
372 
374  unsigned int feature_size_;
375 
376  IndexParams index_params_;
377 
379  unsigned int table_number_;
381  unsigned int key_size_;
383  unsigned int multi_probe_level_;
384 
386  std::vector<lsh::BucketKey> xor_masks_;
387 
388  Distance distance_;
389 };
390 }
391 
392 #endif //OPENCV_FLANN_LSH_INDEX_H_
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &)
Definition: lsh_index.h:220
flann_algorithm_t
Definition: defines.h:81
T get_param(const IndexParams &params, std::string name, const T &default_value)
Definition: params.h:59
GLint level
Definition: tracking.hpp:88
LshIndex(const Matrix< ElementType > &input_data, const IndexParams &params=LshIndexParams(), Distance d=Distance())
Definition: lsh_index.h:89
size_t cols
Definition: matrix.h:52
Definition: lsh_index.h:78
int usedMemory() const
Definition: lsh_index.h:171
CvFileNode const CvStringHashNode * key
Definition: core_c.h:1584
size_t veclen() const
Definition: lsh_index.h:162
GLuint index
Definition: core_c.h:986
int d
Definition: legacy.hpp:3064
IndexParams getParameters() const
Definition: lsh_index.h:177
flann_algorithm_t getType() const
Definition: lsh_index.h:122
Distance::ElementType ElementType
Definition: lsh_index.h:81
Definition: lsh_table.h:130
virtual void knnSearch(const Matrix< ElementType > &queries, Matrix< int > &indices, Matrix< DistanceType > &dists, int knn, const SearchParams &params)
Perform k-nearest neighbor search.
Definition: lsh_index.h:190
const CvArr const CvArr CvArr * result
Definition: core_c.h:805
unsigned int BucketKey
Definition: lsh_table.h:72
GLenum GLsizei GLenum GLenum const GLvoid * table
Definition: params.h:44
size_t size() const
Definition: lsh_index.h:154
LshIndex & operator=(const LshIndex &)
virtual void sortAndCopy(int *indices, DistanceType *dist, int n_neighbors=-1) const
Definition: result_set.h:351
GLuint GLuint GLsizei GLenum const GLvoid * indices
Definition: legacy.hpp:3084
Distance::ResultType DistanceType
Definition: lsh_index.h:82
std::vector< FeatureIndex > Bucket
Definition: lsh_table.h:76
int index
Definition: core_c.h:309
LshIndexParams(unsigned int table_number=12, unsigned int key_size=20, unsigned int multi_probe_level=2)
Definition: lsh_index.h:59
GLdouble left
void load_value(FILE *stream, T &value, size_t count=1)
Definition: saving.h:147
void clear()
Definition: result_set.h:425
Definition: result_set.h:389
GLdouble GLdouble right
void buildIndex()
Definition: lsh_index.h:110
virtual void copy(int *indices, DistanceType *dist, int n_neighbors=-1) const
Definition: result_set.h:327
Definition: result_set.h:66
std::map< std::string, any > IndexParams
Definition: params.h:42
GLenum const GLfloat * params
Definition: compat.hpp:688
Definition: defines.h:89
void add(unsigned int value, const ElementType *feature)
Definition: lsh_table.h:166
Definition: nn_index.h:48
size_t rows
Definition: matrix.h:51
::max::max int
Definition: functional.hpp:324
void saveIndex(FILE *stream)
Saves the index to a stream.
Definition: lsh_index.h:128
void save_value(FILE *stream, const T &value, size_t count=1)
Definition: saving.h:126
CvPoint int radius
Definition: core_c.h:1290
Definition: lsh_index.h:57
void loadIndex(FILE *stream)
Loads the index from a stream.
Definition: lsh_index.h:136