30 #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
31 #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
48 template<
typename Distance>
54 AutotunedIndexParams(
float target_precision = 0.8,
float build_weight = 0.01,
float memory_weight = 0,
float sample_fraction = 0.1)
58 (*this)[
"target_precision"] = target_precision;
60 (*this)[
"build_weight"] = build_weight;
62 (*this)[
"memory_weight"] = memory_weight;
64 (*this)[
"sample_fraction"] = sample_fraction;
69 template <
typename Distance>
77 dataset_(inputData), distance_(
d)
91 if (bestIndex_ != NULL) {
102 bestParams_ = estimateBuildParams();
103 Logger::info(
"----------------------------------------------------\n");
104 Logger::info(
"Autotuned parameters:\n");
106 Logger::info(
"----------------------------------------------------\n");
109 bestIndex_->buildIndex();
110 speedup_ = estimateSearchParams(bestSearchParams_);
111 Logger::info(
"----------------------------------------------------\n");
112 Logger::info(
"Search parameters:\n");
114 Logger::info(
"----------------------------------------------------\n");
122 save_value(stream, (
int)bestIndex_->getType());
123 bestIndex_->saveIndex(stream);
124 save_value(stream, get_param<int>(bestSearchParams_,
"checks"));
137 bestIndex_ = create_index_by_type<Distance>(dataset_,
params, distance_);
138 bestIndex_->loadIndex(stream);
141 bestSearchParams_[
"checks"] = checks;
151 bestIndex_->findNeighbors(result, vec, bestSearchParams_);
154 bestIndex_->findNeighbors(result, vec, searchParams);
161 return bestIndex_->getParameters();
166 return bestSearchParams_;
180 return bestIndex_->size();
188 return bestIndex_->veclen();
196 return bestIndex_->usedMemory();
211 float searchTimeCost;
218 void evaluate_kmeans(CostData&
cost)
224 Logger::info(
"KMeansTree using params: max_iterations=%d, branching=%d\n",
225 get_param<int>(cost.params,
"iterations"),
226 get_param<int>(cost.params,
"branching"));
227 KMeansIndex<Distance>
kmeans(sampledDataset_, cost.params, distance_);
232 float buildTime = (
float)t.value;
235 float searchTime =
test_index_precision(
kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
237 float datasetMemory =
float(sampledDataset_.
rows * sampledDataset_.
cols *
sizeof(
float));
238 cost.memoryCost = (
kmeans.usedMemory() + datasetMemory) / datasetMemory;
239 cost.searchTimeCost = searchTime;
240 cost.buildTimeCost = buildTime;
241 Logger::info(
"KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
245 void evaluate_kdtree(CostData& cost)
251 Logger::info(
"KDTree using params: trees=%d\n", get_param<int>(cost.params,
"trees"));
252 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
257 float buildTime = (
float)t.value;
260 float searchTime =
test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
262 float datasetMemory =
float(sampledDataset_.
rows * sampledDataset_.
cols *
sizeof(
float));
263 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
264 cost.searchTimeCost = searchTime;
265 cost.buildTimeCost = buildTime;
266 Logger::info(
"KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
318 void optimizeKMeans(std::vector<CostData>& costs)
320 Logger::info(
"KMEANS, Step 1: Exploring parameter space\n");
323 int maxIterations[] = { 1, 5, 10, 15 };
324 int branchingFactors[] = { 16, 32, 64, 128, 256 };
326 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
327 costs.reserve(costs.size() + kmeansParamSpaceSize);
330 for (
size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
331 for (
size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
335 cost.params[
"iterations"] = maxIterations[i];
336 cost.params[
"branching"] = branchingFactors[j];
338 evaluate_kmeans(cost);
339 costs.push_back(cost);
366 void optimizeKDTree(std::vector<CostData>& costs)
368 Logger::info(
"KD-TREE, Step 1: Exploring parameter space\n");
371 int testTrees[] = { 1, 4, 8, 16, 32 };
374 for (
size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
376 cost.params[
"trees"] = testTrees[i];
378 evaluate_kdtree(cost);
379 costs.push_back(cost);
409 std::vector<CostData> costs;
411 int sampleSize =
int(sample_fraction_ * dataset_.
rows);
412 int testSampleSize =
std::min(sampleSize / 10, 1000);
414 Logger::info(
"Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.
rows, sampleSize, testSampleSize, target_precision_);
418 if (testSampleSize < 10) {
419 Logger::info(
"Choosing linear, dataset too small\n");
420 return LinearIndexParams();
426 testDataset_ =
random_sample(sampledDataset_, testSampleSize,
true);
429 Logger::info(
"Computing ground truth... \n");
430 gt_matches_ = Matrix<int>(
new int[testDataset_.
rows], testDataset_.
rows, 1);
433 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
436 CostData linear_cost;
437 linear_cost.searchTimeCost = (
float)t.value;
438 linear_cost.buildTimeCost = 0;
439 linear_cost.memoryCost = 0;
442 costs.push_back(linear_cost);
445 Logger::info(
"Autotuning parameters...\n");
447 optimizeKMeans(costs);
448 optimizeKDTree(costs);
450 float bestTimeCost = costs[0].searchTimeCost;
451 for (
size_t i = 0; i < costs.size(); ++i) {
452 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
453 if (timeCost < bestTimeCost) {
454 bestTimeCost = timeCost;
458 float bestCost = costs[0].searchTimeCost / bestTimeCost;
460 if (bestTimeCost > 0) {
461 for (
size_t i = 0; i < costs.size(); ++i) {
462 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
463 memory_weight_ * costs[i].memoryCost;
464 if (crtCost < bestCost) {
466 bestParams = costs[i].params;
471 delete[] gt_matches_.data;
472 delete[] testDataset_.data;
473 delete[] sampledDataset_.data;
485 float estimateSearchParams(SearchParams& searchParams)
488 const size_t SAMPLE_COUNT = 1000;
490 assert(bestIndex_ != NULL);
496 Matrix<ElementType> testDataset =
random_sample(dataset_, samples);
498 Logger::info(
"Computing ground truth\n");
501 Matrix<int> gt_matches(
new int[testDataset.rows], testDataset.rows, 1);
504 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
506 float linear = (
float)t.value;
509 Logger::info(
"Estimating number of checks\n");
514 Logger::info(
"KMeans algorithm, estimating cluster border factor\n");
515 KMeansIndex<Distance>*
kmeans = (KMeansIndex<Distance>*)bestIndex_;
516 float bestSearchTime = -1;
517 float best_cb_index = -1;
518 int best_checks = -1;
519 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
520 kmeans->set_cb_index(cb_index);
521 searchTime =
test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
522 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
523 bestSearchTime = searchTime;
524 best_cb_index = cb_index;
525 best_checks = checks;
528 searchTime = bestSearchTime;
529 cb_index = best_cb_index;
530 checks = best_checks;
532 kmeans->set_cb_index(best_cb_index);
533 Logger::info(
"Optimum cb_index: %g\n", cb_index);
534 bestParams_[
"cb_index"] = cb_index;
537 searchTime =
test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
540 Logger::info(
"Required number of checks: %d \n", checks);
541 searchParams[
"checks"] = checks;
543 speedup = linear / searchTime;
545 delete[] gt_matches.data;
546 delete[] testDataset.data;
553 NNIndex<Distance>* bestIndex_;
556 SearchParams bestSearchParams_;
558 Matrix<ElementType> sampledDataset_;
559 Matrix<ElementType> testDataset_;
560 Matrix<int> gt_matches_;
567 const Matrix<ElementType> dataset_;
572 float target_precision_;
574 float memory_weight_;
575 float sample_fraction_;
const CvArr CvSeq CvSeq CvMemStorage CvSURFParams params
Definition: compat.hpp:647
flann_algorithm_t
Definition: defines.h:81
Definition: defines.h:171
T get_param(const IndexParams ¶ms, std::string name, const T &default_value)
Definition: params.h:59
size_t cols
Definition: matrix.h:52
Distance::ResultType DistanceType
Definition: autotuned_index.h:74
virtual void saveIndex(FILE *stream)
Definition: autotuned_index.h:120
AutotunedIndex(const Matrix< ElementType > &inputData, const IndexParams ¶ms=AutotunedIndexParams(), Distance d=Distance())
Definition: autotuned_index.h:76
IndexParams getParameters() const
Definition: autotuned_index.h:159
virtual size_t size() const
Definition: autotuned_index.h:178
void print_params(const IndexParams ¶ms)
Definition: params.h:82
int d
Definition: legacy.hpp:3064
Matrix< T > random_sample(Matrix< T > &srcMatrix, long size, bool remove=false)
Definition: sampling.h:40
CV_EXPORTS_W double kmeans(InputArray data, int K, CV_OUT InputOutputArray bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray centers=noArray())
clusters the input data using k-Means algorithm
const CvArr const CvArr CvArr * result
Definition: core_c.h:805
virtual void loadIndex(FILE *stream)
Definition: autotuned_index.h:130
float getSpeedup() const
Definition: autotuned_index.h:169
virtual void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams)
Definition: autotuned_index.h:147
AutotunedIndex & operator=(const AutotunedIndex &)
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
computes per-element minimum of two arrays (dst = min(src1, src2))
Definition: autotuned_index.h:52
virtual ~AutotunedIndex()
Definition: autotuned_index.h:89
void load_value(FILE *stream, T &value, size_t count=1)
Definition: saving.h:147
Definition: autotuned_index.h:70
Distance::ElementType ElementType
Definition: autotuned_index.h:73
SearchParams getSearchParameters() const
Definition: autotuned_index.h:164
Definition: result_set.h:66
std::map< std::string, any > IndexParams
Definition: params.h:42
GLenum const GLfloat * params
Definition: compat.hpp:688
float test_index_precision(NNIndex< Distance > &index, const Matrix< typename Distance::ElementType > &inputData, const Matrix< typename Distance::ElementType > &testData, const Matrix< int > &matches, float precision, int &checks, const Distance &distance, int nn=1, int skipMatches=0)
Definition: index_testing.h:154
Definition: nn_index.h:48
size_t rows
Definition: matrix.h:51
::max::max::max float
Definition: functional.hpp:326
virtual flann_algorithm_t getType() const
Definition: autotuned_index.h:202
::max::max int
Definition: functional.hpp:324
virtual void buildIndex()
Definition: autotuned_index.h:100
const CvArr * cost
Definition: calib3d.hpp:359
Definition: defines.h:107
void save_value(FILE *stream, const T &value, size_t count=1)
Definition: saving.h:126
virtual size_t veclen() const
Definition: autotuned_index.h:186
NNIndex< Distance > * create_index_by_type(const Matrix< typename Distance::ElementType > &dataset, const IndexParams ¶ms, const Distance &distance)
Definition: all_indices.h:146
virtual int usedMemory() const
Definition: autotuned_index.h:194
AutotunedIndexParams(float target_precision=0.8, float build_weight=0.01, float memory_weight=0, float sample_fraction=0.1)
Definition: autotuned_index.h:54