00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #ifndef _OPENCV_AUTOTUNEDINDEX_H_
00032 #define _OPENCV_AUTOTUNEDINDEX_H_
00033
00034 #include "opencv2/flann/general.h"
00035 #include "opencv2/flann/nn_index.h"
00036 #include "opencv2/flann/ground_truth.h"
00037 #include "opencv2/flann/index_testing.h"
00038 #include "opencv2/flann/sampling.h"
00039 #include "opencv2/flann/all_indices.h"
00040
00041 namespace cvflann
00042 {
00043
00044 struct AutotunedIndexParams : public IndexParams {
00045 AutotunedIndexParams( float target_precision_ = 0.8, float build_weight_ = 0.01,
00046 float memory_weight_ = 0, float sample_fraction_ = 0.1) :
00047 IndexParams(FLANN_INDEX_AUTOTUNED),
00048 target_precision(target_precision_),
00049 build_weight(build_weight_),
00050 memory_weight(memory_weight_),
00051 sample_fraction(sample_fraction_) {};
00052
00053 float target_precision;
00054 float build_weight;
00055 float memory_weight;
00056 float sample_fraction;
00057
00058 void print() const
00059 {
00060 logger().info("Index type: %d\n",(int)algorithm);
00061 logger().info("logger(). precision: %g\n", target_precision);
00062 logger().info("Build weight: %g\n", build_weight);
00063 logger().info("Memory weight: %g\n", memory_weight);
00064 logger().info("Sample fraction: %g\n", sample_fraction);
00065 }
00066 };
00067
00068
00069 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type >
00070 class AutotunedIndex : public NNIndex<ELEM_TYPE>
00071 {
00072 NNIndex<ELEM_TYPE>* bestIndex;
00073
00074 IndexParams* bestParams;
00075 SearchParams bestSearchParams;
00076
00077 Matrix<ELEM_TYPE> sampledDataset;
00078 Matrix<ELEM_TYPE> testDataset;
00079 Matrix<int> gt_matches;
00080
00081 float speedup;
00082
00086 const Matrix<ELEM_TYPE> dataset;
00087
00091 const AutotunedIndexParams& index_params;
00092
00093 AutotunedIndex& operator=(const AutotunedIndex&);
00094 AutotunedIndex(const AutotunedIndex&);
00095
00096 public:
00097
00098 AutotunedIndex(const Matrix<ELEM_TYPE>& inputData, const AutotunedIndexParams& params = AutotunedIndexParams() ) :
00099 dataset(inputData), index_params(params)
00100 {
00101 bestIndex = NULL;
00102 bestParams = NULL;
00103 }
00104
00105 virtual ~AutotunedIndex()
00106 {
00107 if (bestIndex!=NULL) {
00108 delete bestIndex;
00109 }
00110 if (bestParams!=NULL) {
00111 delete bestParams;
00112 }
00113 };
00114
00118 virtual void buildIndex()
00119 {
00120 bestParams = estimateBuildParams();
00121 logger().info("----------------------------------------------------\n");
00122 logger().info("Autotuned parameters:\n");
00123 bestParams->print();
00124 logger().info("----------------------------------------------------\n");
00125 flann_algorithm_t index_type = bestParams->getIndexType();
00126 switch (index_type) {
00127 case FLANN_INDEX_LINEAR:
00128 bestIndex = new LinearIndex<ELEM_TYPE>(dataset, (const LinearIndexParams&)*bestParams);
00129 break;
00130 case FLANN_INDEX_KDTREE:
00131 bestIndex = new KDTreeIndex<ELEM_TYPE>(dataset, (const KDTreeIndexParams&)*bestParams);
00132 break;
00133 case FLANN_INDEX_KMEANS:
00134 bestIndex = new KMeansIndex<ELEM_TYPE>(dataset, (const KMeansIndexParams&)*bestParams);
00135 break;
00136 default:
00137 throw FLANNException("Unknown algorithm choosen by the autotuning, most likely a bug.");
00138 }
00139 bestIndex->buildIndex();
00140 speedup = estimateSearchParams(bestSearchParams);
00141 }
00142
00146 virtual void saveIndex(FILE* stream)
00147 {
00148 save_value(stream, (int)bestIndex->getType());
00149 bestIndex->saveIndex(stream);
00150 save_value(stream, bestSearchParams);
00151 }
00152
00156 virtual void loadIndex(FILE* stream)
00157 {
00158 int index_type;
00159 load_value(stream,index_type);
00160 IndexParams* params = ParamsFactory_instance().create((flann_algorithm_t)index_type);
00161 bestIndex = create_index_by_type(dataset, *params);
00162 bestIndex->loadIndex(stream);
00163 load_value(stream, bestSearchParams);
00164 }
00165
00169 virtual void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams)
00170 {
00171 if (searchParams.checks==-2) {
00172 bestIndex->findNeighbors(result, vec, bestSearchParams);
00173 }
00174 else {
00175 bestIndex->findNeighbors(result, vec, searchParams);
00176 }
00177 }
00178
00179
00180 const IndexParams* getParameters() const
00181 {
00182 return bestIndex->getParameters();
00183 }
00184
00185
00189 virtual size_t size() const
00190 {
00191 return bestIndex->size();
00192 }
00193
00197 virtual size_t veclen() const
00198 {
00199 return bestIndex->veclen();
00200 }
00201
00205 virtual int usedMemory() const
00206 {
00207 return bestIndex->usedMemory();
00208 }
00209
00213 virtual flann_algorithm_t getType() const
00214 {
00215 return FLANN_INDEX_AUTOTUNED;
00216 }
00217
00218 private:
00219
00220 struct CostData {
00221 float searchTimeCost;
00222 float buildTimeCost;
00223 float timeCost;
00224 float memoryCost;
00225 float totalCost;
00226 };
00227
00228 typedef std::pair<CostData, KDTreeIndexParams> KDTreeCostData;
00229 typedef std::pair<CostData, KMeansIndexParams> KMeansCostData;
00230
00231
00232 void evaluate_kmeans(CostData& cost, const KMeansIndexParams& kmeans_params)
00233 {
00234 StartStopTimer t;
00235 int checks;
00236 const int nn = 1;
00237
00238 logger().info("KMeansTree using params: max_iterations=%d, branching=%d\n", kmeans_params.iterations, kmeans_params.branching);
00239 KMeansIndex<ELEM_TYPE> kmeans(sampledDataset, kmeans_params);
00240
00241 t.start();
00242 kmeans.buildIndex();
00243 t.stop();
00244 float buildTime = (float)t.value;
00245
00246
00247 float searchTime = test_index_precision(kmeans, sampledDataset, testDataset, gt_matches, index_params.target_precision, checks, nn);;
00248
00249 float datasetMemory = (float)(sampledDataset.rows*sampledDataset.cols*sizeof(float));
00250 cost.memoryCost = (kmeans.usedMemory()+datasetMemory)/datasetMemory;
00251 cost.searchTimeCost = searchTime;
00252 cost.buildTimeCost = buildTime;
00253 cost.timeCost = (buildTime*index_params.build_weight+searchTime);
00254 logger().info("KMeansTree buildTime=%g, searchTime=%g, timeCost=%g, buildTimeFactor=%g\n",buildTime, searchTime, cost.timeCost, index_params.build_weight);
00255 }
00256
00257
00258 void evaluate_kdtree(CostData& cost, const KDTreeIndexParams& kdtree_params)
00259 {
00260 StartStopTimer t;
00261 int checks;
00262 const int nn = 1;
00263
00264 logger().info("KDTree using params: trees=%d\n",kdtree_params.trees);
00265 KDTreeIndex<ELEM_TYPE> kdtree(sampledDataset, kdtree_params);
00266
00267 t.start();
00268 kdtree.buildIndex();
00269 t.stop();
00270 float buildTime = (float)t.value;
00271
00272
00273 float searchTime = test_index_precision(kdtree, sampledDataset, testDataset, gt_matches, index_params.target_precision, checks, nn);
00274
00275 float datasetMemory = (float)(sampledDataset.rows*sampledDataset.cols*sizeof(float));
00276 cost.memoryCost = (kdtree.usedMemory()+datasetMemory)/datasetMemory;
00277 cost.searchTimeCost = searchTime;
00278 cost.buildTimeCost = buildTime;
00279 cost.timeCost = (buildTime*index_params.build_weight+searchTime);
00280 logger().info("KDTree buildTime=%g, searchTime=%g, timeCost=%g\n",buildTime, searchTime, cost.timeCost);
00281 }
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332 KMeansCostData optimizeKMeans()
00333 {
00334 logger().info("KMEANS, Step 1: Exploring parameter space\n");
00335
00336
00337 int maxIterations[] = { 1, 5, 10, 15 };
00338 int branchingFactors[] = { 16, 32, 64, 128, 256 };
00339
00340 int kmeansParamSpaceSize = ARRAY_LEN(maxIterations)*ARRAY_LEN(branchingFactors);
00341
00342 std::vector<KMeansCostData> kmeansCosts(kmeansParamSpaceSize);
00343
00344
00345
00346
00347 int cnt = 0;
00348 for (size_t i=0; i<ARRAY_LEN(maxIterations); ++i) {
00349 for (size_t j=0; j<ARRAY_LEN(branchingFactors); ++j) {
00350
00351 kmeansCosts[cnt].second.centers_init = FLANN_CENTERS_RANDOM;
00352 kmeansCosts[cnt].second.iterations = maxIterations[i];
00353 kmeansCosts[cnt].second.branching = branchingFactors[j];
00354
00355 evaluate_kmeans(kmeansCosts[cnt].first, kmeansCosts[cnt].second);
00356
00357 int k = cnt;
00358
00359 while (k>0 && kmeansCosts[k].first.timeCost < kmeansCosts[k-1].first.timeCost) {
00360 swap(kmeansCosts[k],kmeansCosts[k-1]);
00361 --k;
00362 }
00363 ++cnt;
00364 }
00365 }
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388 float optTimeCost = kmeansCosts[0].first.timeCost;
00389
00390 for (int i=0;i<kmeansParamSpaceSize;++i) {
00391 kmeansCosts[i].first.totalCost = (kmeansCosts[i].first.timeCost/optTimeCost + index_params.memory_weight * kmeansCosts[i].first.memoryCost);
00392
00393 int k = i;
00394 while (k>0 && kmeansCosts[k].first.totalCost < kmeansCosts[k-1].first.totalCost) {
00395 swap(kmeansCosts[k],kmeansCosts[k-1]);
00396 k--;
00397 }
00398 }
00399
00400 for (int i=0;i<kmeansParamSpaceSize;++i) {
00401 logger().info("KMeans, branching=%d, iterations=%d, time_cost=%g[%g] (build=%g, search=%g), memory_cost=%g, cost=%g\n",
00402 kmeansCosts[i].second.branching, kmeansCosts[i].second.iterations,
00403 kmeansCosts[i].first.timeCost,kmeansCosts[i].first.timeCost/optTimeCost,
00404 kmeansCosts[i].first.buildTimeCost, kmeansCosts[i].first.searchTimeCost,
00405 kmeansCosts[i].first.memoryCost,kmeansCosts[i].first.totalCost);
00406 }
00407
00408 return kmeansCosts[0];
00409 }
00410
00411
00412 KDTreeCostData optimizeKDTree()
00413 {
00414
00415 logger().info("KD-TREE, Step 1: Exploring parameter space\n");
00416
00417
00418 int testTrees[] = { 1, 4, 8, 16, 32 };
00419
00420 size_t kdtreeParamSpaceSize = ARRAY_LEN(testTrees);
00421 std::vector<KDTreeCostData> kdtreeCosts(kdtreeParamSpaceSize);
00422
00423
00424 int cnt = 0;
00425 for (size_t i=0; i<ARRAY_LEN(testTrees); ++i) {
00426 kdtreeCosts[cnt].second.trees = testTrees[i];
00427
00428 evaluate_kdtree(kdtreeCosts[cnt].first, kdtreeCosts[cnt].second);
00429
00430 int k = cnt;
00431
00432 while (k>0 && kdtreeCosts[k].first.timeCost < kdtreeCosts[k-1].first.timeCost) {
00433 swap(kdtreeCosts[k],kdtreeCosts[k-1]);
00434 --k;
00435 }
00436 ++cnt;
00437 }
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458 float optTimeCost = kdtreeCosts[0].first.timeCost;
00459
00460 for (size_t i=0;i<kdtreeParamSpaceSize;++i) {
00461 kdtreeCosts[i].first.totalCost = (kdtreeCosts[i].first.timeCost/optTimeCost + index_params.memory_weight * kdtreeCosts[i].first.memoryCost);
00462
00463 int k = (int)i;
00464 while (k>0 && kdtreeCosts[k].first.totalCost < kdtreeCosts[k-1].first.totalCost) {
00465 swap(kdtreeCosts[k],kdtreeCosts[k-1]);
00466 k--;
00467 }
00468 }
00469
00470 for (size_t i=0;i<kdtreeParamSpaceSize;++i) {
00471 logger().info("kd-tree, trees=%d, time_cost=%g[%g] (build=%g, search=%g), memory_cost=%g, cost=%g\n",
00472 kdtreeCosts[i].second.trees,kdtreeCosts[i].first.timeCost,kdtreeCosts[i].first.timeCost/optTimeCost,
00473 kdtreeCosts[i].first.buildTimeCost, kdtreeCosts[i].first.searchTimeCost,
00474 kdtreeCosts[i].first.memoryCost,kdtreeCosts[i].first.totalCost);
00475 }
00476
00477 return kdtreeCosts[0];
00478 }
00479
00485 IndexParams* estimateBuildParams()
00486 {
00487 int sampleSize = int(index_params.sample_fraction*dataset.rows);
00488 int testSampleSize = std::min(sampleSize/10, 1000);
00489
00490 logger().info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d\n",dataset.rows, sampleSize, testSampleSize);
00491
00492
00493
00494 if (testSampleSize<10) {
00495 logger().info("Choosing linear, dataset too small\n");
00496 return new LinearIndexParams();
00497 }
00498
00499
00500 sampledDataset = random_sample(dataset,sampleSize);
00501
00502 testDataset = random_sample(sampledDataset,testSampleSize,true);
00503
00504
00505 logger().info("Computing ground truth... \n");
00506 gt_matches = Matrix<int>(new int[testDataset.rows],(long)testDataset.rows, 1);
00507 StartStopTimer t;
00508 t.start();
00509 compute_ground_truth(sampledDataset, testDataset, gt_matches, 0);
00510 t.stop();
00511 float bestCost = (float)t.value;
00512 IndexParams* bestParams = new LinearIndexParams();
00513
00514
00515 logger().info("Autotuning parameters...\n");
00516
00517
00518 KMeansCostData kmeansCost = optimizeKMeans();
00519 if (kmeansCost.first.totalCost<bestCost) {
00520 bestParams = new KMeansIndexParams(kmeansCost.second);
00521 bestCost = kmeansCost.first.totalCost;
00522 }
00523
00524 KDTreeCostData kdtreeCost = optimizeKDTree();
00525
00526 if (kdtreeCost.first.totalCost<bestCost) {
00527 bestParams = new KDTreeIndexParams(kdtreeCost.second);
00528 bestCost = kdtreeCost.first.totalCost;
00529 }
00530
00531 gt_matches.release();
00532 sampledDataset.release();
00533 testDataset.release();
00534
00535 return bestParams;
00536 }
00537
00538
00539
00545 float estimateSearchParams(SearchParams& searchParams)
00546 {
00547 const int nn = 1;
00548 const size_t SAMPLE_COUNT = 1000;
00549
00550 assert(bestIndex!=NULL);
00551
00552 float speedup = 0;
00553
00554 int samples = (int)std::min(dataset.rows/10, SAMPLE_COUNT);
00555 if (samples>0) {
00556 Matrix<ELEM_TYPE> testDataset = random_sample(dataset,samples);
00557
00558 logger().info("Computing ground truth\n");
00559
00560
00561 Matrix<int> gt_matches(new int[testDataset.rows],(long)testDataset.rows,1);
00562 StartStopTimer t;
00563 t.start();
00564 compute_ground_truth(dataset, testDataset, gt_matches,1);
00565 t.stop();
00566 float linear = (float)t.value;
00567
00568 int checks;
00569 logger().info("Estimating number of checks\n");
00570
00571 float searchTime;
00572 float cb_index;
00573 if (bestIndex->getType() == FLANN_INDEX_KMEANS) {
00574 logger().info("KMeans algorithm, estimating cluster border factor\n");
00575 KMeansIndex<ELEM_TYPE>* kmeans = (KMeansIndex<ELEM_TYPE>*)bestIndex;
00576 float bestSearchTime = -1;
00577 float best_cb_index = -1;
00578 int best_checks = -1;
00579 for (cb_index = 0;cb_index<1.1f; cb_index+=0.2f) {
00580 kmeans->set_cb_index(cb_index);
00581 searchTime = test_index_precision(*kmeans, dataset, testDataset, gt_matches, index_params.target_precision, checks, nn, 1);
00582 if (searchTime<bestSearchTime || bestSearchTime == -1) {
00583 bestSearchTime = searchTime;
00584 best_cb_index = cb_index;
00585 best_checks = checks;
00586 }
00587 }
00588 searchTime = bestSearchTime;
00589 cb_index = best_cb_index;
00590 checks = best_checks;
00591
00592 kmeans->set_cb_index(best_cb_index);
00593 logger().info("Optimum cb_index: %g\n",cb_index);
00594 ((KMeansIndexParams*)bestParams)->cb_index = cb_index;
00595 }
00596 else {
00597 searchTime = test_index_precision(*bestIndex, dataset, testDataset, gt_matches, index_params.target_precision, checks, nn, 1);
00598 }
00599
00600 logger().info("Required number of checks: %d \n",checks);;
00601 searchParams.checks = checks;
00602
00603 speedup = linear/searchTime;
00604
00605 gt_matches.release();
00606 }
00607
00608 return speedup;
00609 }
00610
00611 };
00612
00613 }
00614
00615 #endif