00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
00031 #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
00032
00033 #include "general.h"
00034 #include "nn_index.h"
00035 #include "ground_truth.h"
00036 #include "index_testing.h"
00037 #include "sampling.h"
00038 #include "kdtree_index.h"
00039 #include "kdtree_single_index.h"
00040 #include "kmeans_index.h"
00041 #include "composite_index.h"
00042 #include "linear_index.h"
00043 #include "logger.h"
00044
00045 namespace cvflann
00046 {
00047
00048 template<typename Distance>
00049 NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
00050
00051
00052 struct AutotunedIndexParams : public IndexParams
00053 {
00054 AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
00055 {
00056 (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
00057
00058 (*this)["target_precision"] = target_precision;
00059
00060 (*this)["build_weight"] = build_weight;
00061
00062 (*this)["memory_weight"] = memory_weight;
00063
00064 (*this)["sample_fraction"] = sample_fraction;
00065 }
00066 };
00067
00068
00069 template <typename Distance>
00070 class AutotunedIndex : public NNIndex<Distance>
00071 {
00072 public:
00073 typedef typename Distance::ElementType ElementType;
00074 typedef typename Distance::ResultType DistanceType;
00075
00076 AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
00077 dataset_(inputData), distance_(d)
00078 {
00079 target_precision_ = get_param(params, "target_precision",0.8f);
00080 build_weight_ = get_param(params,"build_weight", 0.01f);
00081 memory_weight_ = get_param(params, "memory_weight", 0.0f);
00082 sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
00083 bestIndex_ = NULL;
00084 }
00085
00086 AutotunedIndex(const AutotunedIndex&);
00087 AutotunedIndex& operator=(const AutotunedIndex&);
00088
00089 virtual ~AutotunedIndex()
00090 {
00091 if (bestIndex_ != NULL) {
00092 delete bestIndex_;
00093 bestIndex_ = NULL;
00094 }
00095 }
00096
00100 virtual void buildIndex()
00101 {
00102 bestParams_ = estimateBuildParams();
00103 Logger::info("----------------------------------------------------\n");
00104 Logger::info("Autotuned parameters:\n");
00105 print_params(bestParams_);
00106 Logger::info("----------------------------------------------------\n");
00107
00108 bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
00109 bestIndex_->buildIndex();
00110 speedup_ = estimateSearchParams(bestSearchParams_);
00111 Logger::info("----------------------------------------------------\n");
00112 Logger::info("Search parameters:\n");
00113 print_params(bestSearchParams_);
00114 Logger::info("----------------------------------------------------\n");
00115 }
00116
00120 virtual void saveIndex(FILE* stream)
00121 {
00122 save_value(stream, (int)bestIndex_->getType());
00123 bestIndex_->saveIndex(stream);
00124 save_value(stream, get_param<int>(bestSearchParams_, "checks"));
00125 }
00126
00130 virtual void loadIndex(FILE* stream)
00131 {
00132 int index_type;
00133
00134 load_value(stream, index_type);
00135 IndexParams params;
00136 params["algorithm"] = (flann_algorithm_t)index_type;
00137 bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
00138 bestIndex_->loadIndex(stream);
00139 int checks;
00140 load_value(stream, checks);
00141 bestSearchParams_["checks"] = checks;
00142 }
00143
00147 virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
00148 {
00149 int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
00150 if (checks == FLANN_CHECKS_AUTOTUNED) {
00151 bestIndex_->findNeighbors(result, vec, bestSearchParams_);
00152 }
00153 else {
00154 bestIndex_->findNeighbors(result, vec, searchParams);
00155 }
00156 }
00157
00158
00159 IndexParams getParameters() const
00160 {
00161 return bestIndex_->getParameters();
00162 }
00163
00164 SearchParams getSearchParameters() const
00165 {
00166 return bestSearchParams_;
00167 }
00168
00169 float getSpeedup() const
00170 {
00171 return speedup_;
00172 }
00173
00174
00178 virtual size_t size() const
00179 {
00180 return bestIndex_->size();
00181 }
00182
00186 virtual size_t veclen() const
00187 {
00188 return bestIndex_->veclen();
00189 }
00190
00194 virtual int usedMemory() const
00195 {
00196 return bestIndex_->usedMemory();
00197 }
00198
00202 virtual flann_algorithm_t getType() const
00203 {
00204 return FLANN_INDEX_AUTOTUNED;
00205 }
00206
00207 private:
00208
00209 struct CostData
00210 {
00211 float searchTimeCost;
00212 float buildTimeCost;
00213 float memoryCost;
00214 float totalCost;
00215 IndexParams params;
00216 };
00217
00218 void evaluate_kmeans(CostData& cost)
00219 {
00220 StartStopTimer t;
00221 int checks;
00222 const int nn = 1;
00223
00224 Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
00225 get_param<int>(cost.params,"iterations"),
00226 get_param<int>(cost.params,"branching"));
00227 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
00228
00229 t.start();
00230 kmeans.buildIndex();
00231 t.stop();
00232 float buildTime = (float)t.value;
00233
00234
00235 float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
00236
00237 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
00238 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
00239 cost.searchTimeCost = searchTime;
00240 cost.buildTimeCost = buildTime;
00241 Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
00242 }
00243
00244
00245 void evaluate_kdtree(CostData& cost)
00246 {
00247 StartStopTimer t;
00248 int checks;
00249 const int nn = 1;
00250
00251 Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
00252 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
00253
00254 t.start();
00255 kdtree.buildIndex();
00256 t.stop();
00257 float buildTime = (float)t.value;
00258
00259
00260 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
00261
00262 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
00263 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
00264 cost.searchTimeCost = searchTime;
00265 cost.buildTimeCost = buildTime;
00266 Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
00267 }
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318 void optimizeKMeans(std::vector<CostData>& costs)
00319 {
00320 Logger::info("KMEANS, Step 1: Exploring parameter space\n");
00321
00322
00323 int maxIterations[] = { 1, 5, 10, 15 };
00324 int branchingFactors[] = { 16, 32, 64, 128, 256 };
00325
00326 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
00327 costs.reserve(costs.size() + kmeansParamSpaceSize);
00328
00329
00330 for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
00331 for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
00332 CostData cost;
00333 cost.params["algorithm"] = FLANN_INDEX_KMEANS;
00334 cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
00335 cost.params["iterations"] = maxIterations[i];
00336 cost.params["branching"] = branchingFactors[j];
00337
00338 evaluate_kmeans(cost);
00339 costs.push_back(cost);
00340 }
00341 }
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363 }
00364
00365
00366 void optimizeKDTree(std::vector<CostData>& costs)
00367 {
00368 Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
00369
00370
00371 int testTrees[] = { 1, 4, 8, 16, 32 };
00372
00373
00374 for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
00375 CostData cost;
00376 cost.params["trees"] = testTrees[i];
00377
00378 evaluate_kdtree(cost);
00379 costs.push_back(cost);
00380 }
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400 }
00401
00407 IndexParams estimateBuildParams()
00408 {
00409 std::vector<CostData> costs;
00410
00411 int sampleSize = int(sample_fraction_ * dataset_.rows);
00412 int testSampleSize = std::min(sampleSize / 10, 1000);
00413
00414 Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
00415
00416
00417
00418 if (testSampleSize < 10) {
00419 Logger::info("Choosing linear, dataset too small\n");
00420 return LinearIndexParams();
00421 }
00422
00423
00424 sampledDataset_ = random_sample(dataset_, sampleSize);
00425
00426 testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
00427
00428
00429 Logger::info("Computing ground truth... \n");
00430 gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
00431 StartStopTimer t;
00432 t.start();
00433 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
00434 t.stop();
00435
00436 CostData linear_cost;
00437 linear_cost.searchTimeCost = (float)t.value;
00438 linear_cost.buildTimeCost = 0;
00439 linear_cost.memoryCost = 0;
00440 linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
00441
00442 costs.push_back(linear_cost);
00443
00444
00445 Logger::info("Autotuning parameters...\n");
00446
00447 optimizeKMeans(costs);
00448 optimizeKDTree(costs);
00449
00450 float bestTimeCost = costs[0].searchTimeCost;
00451 for (size_t i = 0; i < costs.size(); ++i) {
00452 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
00453 if (timeCost < bestTimeCost) {
00454 bestTimeCost = timeCost;
00455 }
00456 }
00457
00458 float bestCost = costs[0].searchTimeCost / bestTimeCost;
00459 IndexParams bestParams = costs[0].params;
00460 if (bestTimeCost > 0) {
00461 for (size_t i = 0; i < costs.size(); ++i) {
00462 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
00463 memory_weight_ * costs[i].memoryCost;
00464 if (crtCost < bestCost) {
00465 bestCost = crtCost;
00466 bestParams = costs[i].params;
00467 }
00468 }
00469 }
00470
00471 delete[] gt_matches_.data;
00472 delete[] testDataset_.data;
00473 delete[] sampledDataset_.data;
00474
00475 return bestParams;
00476 }
00477
00478
00479
00485 float estimateSearchParams(SearchParams& searchParams)
00486 {
00487 const int nn = 1;
00488 const size_t SAMPLE_COUNT = 1000;
00489
00490 assert(bestIndex_ != NULL);
00491
00492 float speedup = 0;
00493
00494 int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
00495 if (samples > 0) {
00496 Matrix<ElementType> testDataset = random_sample(dataset_, samples);
00497
00498 Logger::info("Computing ground truth\n");
00499
00500
00501 Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
00502 StartStopTimer t;
00503 t.start();
00504 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
00505 t.stop();
00506 float linear = (float)t.value;
00507
00508 int checks;
00509 Logger::info("Estimating number of checks\n");
00510
00511 float searchTime;
00512 float cb_index;
00513 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
00514 Logger::info("KMeans algorithm, estimating cluster border factor\n");
00515 KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
00516 float bestSearchTime = -1;
00517 float best_cb_index = -1;
00518 int best_checks = -1;
00519 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
00520 kmeans->set_cb_index(cb_index);
00521 searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
00522 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
00523 bestSearchTime = searchTime;
00524 best_cb_index = cb_index;
00525 best_checks = checks;
00526 }
00527 }
00528 searchTime = bestSearchTime;
00529 cb_index = best_cb_index;
00530 checks = best_checks;
00531
00532 kmeans->set_cb_index(best_cb_index);
00533 Logger::info("Optimum cb_index: %g\n", cb_index);
00534 bestParams_["cb_index"] = cb_index;
00535 }
00536 else {
00537 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
00538 }
00539
00540 Logger::info("Required number of checks: %d \n", checks);
00541 searchParams["checks"] = checks;
00542
00543 speedup = linear / searchTime;
00544
00545 delete[] gt_matches.data;
00546 delete[] testDataset.data;
00547 }
00548
00549 return speedup;
00550 }
00551
00552 private:
00553 NNIndex<Distance>* bestIndex_;
00554
00555 IndexParams bestParams_;
00556 SearchParams bestSearchParams_;
00557
00558 Matrix<ElementType> sampledDataset_;
00559 Matrix<ElementType> testDataset_;
00560 Matrix<int> gt_matches_;
00561
00562 float speedup_;
00563
00567 const Matrix<ElementType> dataset_;
00568
00572 float target_precision_;
00573 float build_weight_;
00574 float memory_weight_;
00575 float sample_fraction_;
00576
00577 Distance distance_;
00578
00579
00580 };
00581 }
00582
00583 #endif