include/opencv2/objdetect/objdetect.hpp
Go to the documentation of this file.
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_OBJDETECT_HPP__
00044 #define __OPENCV_OBJDETECT_HPP__
00045 
00046 #include "opencv2/core/core.hpp"
00047 
00048 #ifdef __cplusplus
00049 #include <map>
00050 #include <deque>
00051 
00052 extern "C" {
00053 #endif
00054 
00055 /****************************************************************************************\
00056 *                         Haar-like Object Detection functions                           *
00057 \****************************************************************************************/
00058 
00059 #define CV_HAAR_MAGIC_VAL    0x42500000
00060 #define CV_TYPE_NAME_HAAR    "opencv-haar-classifier"
00061 
00062 #define CV_IS_HAAR_CLASSIFIER( haar )                                                    \
00063     ((haar) != NULL &&                                                                   \
00064     (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)
00065 
00066 #define CV_HAAR_FEATURE_MAX  3
00067 
00068 typedef struct CvHaarFeature
00069 {
00070     int tilted;
00071     struct
00072     {
00073         CvRect r;
00074         float weight;
00075     } rect[CV_HAAR_FEATURE_MAX];
00076 } CvHaarFeature;
00077 
00078 typedef struct CvHaarClassifier
00079 {
00080     int count;
00081     CvHaarFeature* haar_feature;
00082     float* threshold;
00083     int* left;
00084     int* right;
00085     float* alpha;
00086 } CvHaarClassifier;
00087 
00088 typedef struct CvHaarStageClassifier
00089 {
00090     int  count;
00091     float threshold;
00092     CvHaarClassifier* classifier;
00093 
00094     int next;
00095     int child;
00096     int parent;
00097 } CvHaarStageClassifier;
00098 
00099 typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;
00100 
00101 typedef struct CvHaarClassifierCascade
00102 {
00103     int  flags;
00104     int  count;
00105     CvSize orig_window_size;
00106     CvSize real_window_size;
00107     double scale;
00108     CvHaarStageClassifier* stage_classifier;
00109     CvHidHaarClassifierCascade* hid_cascade;
00110 } CvHaarClassifierCascade;
00111 
00112 typedef struct CvAvgComp
00113 {
00114     CvRect rect;
00115     int neighbors;
00116 } CvAvgComp;
00117 
00118 /* Loads haar classifier cascade from a directory.
00119    It is obsolete: convert your cascade to xml and use cvLoad instead */
00120 CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
00121                     const char* directory, CvSize orig_window_size);
00122 
00123 CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );
00124 
00125 #define CV_HAAR_DO_CANNY_PRUNING    1
00126 #define CV_HAAR_SCALE_IMAGE         2
00127 #define CV_HAAR_FIND_BIGGEST_OBJECT 4
00128 #define CV_HAAR_DO_ROUGH_SEARCH     8
00129 
00130 //CVAPI(CvSeq*) cvHaarDetectObjectsForROC( const CvArr* image,
00131 //                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
00132 //                     CvSeq** rejectLevels, CvSeq** levelWeightds,
00133 //                     double scale_factor CV_DEFAULT(1.1),
00134 //                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
00135 //                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
00136 //                     bool outputRejectLevels = false );
00137 
00138 
00139 CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
00140                      CvHaarClassifierCascade* cascade, CvMemStorage* storage,
00141                      double scale_factor CV_DEFAULT(1.1),
00142                      int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
00143                      CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
00144 
00145 /* sets images for haar classifier cascade */
00146 CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
00147                                                 const CvArr* sum, const CvArr* sqsum,
00148                                                 const CvArr* tilted_sum, double scale );
00149 
00150 /* runs the cascade on the specified window */
00151 CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
00152                                        CvPoint pt, int start_stage CV_DEFAULT(0));
00153 
00154 
00155 /****************************************************************************************\
00156 *                         Latent SVM Object Detection functions                          *
00157 \****************************************************************************************/
00158 
00159 // DataType: STRUCT position
00160 // Structure describes the position of the filter in the feature pyramid
00161 // l - level in the feature pyramid
00162 // (x, y) - coordinate in level l
00163 typedef struct CvLSVMFilterPosition
00164 {
00165     int x;
00166     int y;
00167     int l;
00168 } CvLSVMFilterPosition;
00169 
00170 // DataType: STRUCT filterObject
00171 // Description of the filter, which corresponds to the part of the object
00172 // V               - ideal (penalty = 0) position of the partial filter
00173 //                   from the root filter position (V_i in the paper)
00174 // penaltyFunction - vector describes penalty function (d_i in the paper)
00175 //                   pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
00176 // FILTER DESCRIPTION
00177 //   Rectangular map (sizeX x sizeY),
00178 //   every cell stores feature vector (dimension = p)
00179 // H               - matrix of feature vectors
00180 //                   to set and get feature vectors (i,j)
00181 //                   used formula H[(j * sizeX + i) * p + k], where
00182 //                   k - component of feature vector in cell (i, j)
00183 // END OF FILTER DESCRIPTION
00184 typedef struct CvLSVMFilterObject{
00185     CvLSVMFilterPosition V;
00186     float fineFunction[4];
00187     int sizeX;
00188     int sizeY;
00189     int numFeatures;
00190     float *H;
00191 } CvLSVMFilterObject;
00192 
00193 // data type: STRUCT CvLatentSvmDetector
00194 // structure contains internal representation of trained Latent SVM detector
00195 // num_filters          - total number of filters (root plus part) in model
00196 // num_components       - number of components in model
00197 // num_part_filters     - array containing number of part filters for each component
00198 // filters              - root and part filters for all model components
00199 // b                    - biases for all model components
00200 // score_threshold      - confidence level threshold
00201 typedef struct CvLatentSvmDetector
00202 {
00203     int num_filters;
00204     int num_components;
00205     int* num_part_filters;
00206     CvLSVMFilterObject** filters;
00207     float* b;
00208     float score_threshold;
00209 }
00210 CvLatentSvmDetector;
00211 
00212 // data type: STRUCT CvObjectDetection
00213 // structure contains the bounding box and confidence level for detected object
00214 // rect                 - bounding box for a detected object
00215 // score                - confidence level
00216 typedef struct CvObjectDetection
00217 {
00218     CvRect rect;
00219     float score;
00220 } CvObjectDetection;
00221 
00223 
00224 
00225 /*
00226 // load trained detector from a file
00227 //
00228 // API
00229 // CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
00230 // INPUT
00231 // filename             - path to the file containing the parameters of
00232                         - trained Latent SVM detector
00233 // OUTPUT
00234 // trained Latent SVM detector in internal representation
00235 */
00236 CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);
00237 
00238 /*
00239 // release memory allocated for CvLatentSvmDetector structure
00240 //
00241 // API
00242 // void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
00243 // INPUT
00244 // detector             - CvLatentSvmDetector structure to be released
00245 // OUTPUT
00246 */
00247 CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
00248 
00249 /*
00250 // find rectangular regions in the given image that are likely
00251 // to contain objects and corresponding confidence levels
00252 //
00253 // API
00254 // CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
00255 //                                  CvLatentSvmDetector* detector,
00256 //                                  CvMemStorage* storage,
00257 //                                  float overlap_threshold = 0.5f,
00258 //                                  int numThreads = -1);
00259 // INPUT
00260 // image                - image to detect objects in
00261 // detector             - Latent SVM detector in internal representation
00262 // storage              - memory storage to store the resultant sequence
00263 //                          of the object candidate rectangles
00264 // overlap_threshold    - threshold for the non-maximum suppression algorithm
00265                            = 0.5f [here will be the reference to original paper]
00266 // OUTPUT
00267 // sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
00268 */
00269 CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
00270                                 CvLatentSvmDetector* detector,
00271                                 CvMemStorage* storage,
00272                                 float overlap_threshold CV_DEFAULT(0.5f),
00273                                 int numThreads CV_DEFAULT(-1));
00274 
00275 #ifdef __cplusplus
00276 }
00277 
00278 CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
00279                      CvHaarClassifierCascade* cascade, CvMemStorage* storage,
00280                      std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
00281                      double scale_factor CV_DEFAULT(1.1),
00282                      int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
00283                      CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
00284                      bool outputRejectLevels = false );
00285 
00286 namespace cv
00287 {
00288 
00290 
00291 /*
00292  * This is a class wrapping up the structure CvLatentSvmDetector and functions working with it.
00293  * The class goals are:
00294  * 1) provide c++ interface;
00295  * 2) make it possible to load and detect more than one class (model) unlike CvLatentSvmDetector.
00296  */
00297 class CV_EXPORTS LatentSvmDetector
00298 {
00299 public:
00300     struct CV_EXPORTS ObjectDetection
00301     {
00302         ObjectDetection();
00303         ObjectDetection( const Rect& rect, float score, int classID=-1 );
00304         Rect rect;
00305         float score;
00306         int classID;
00307     };
00308 
00309     LatentSvmDetector();
00310     LatentSvmDetector( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
00311     virtual ~LatentSvmDetector();
00312 
00313     virtual void clear();
00314     virtual bool empty() const;
00315     bool load( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
00316 
00317     virtual void detect( const Mat& image,
00318                          vector<ObjectDetection>& objectDetections,
00319                          float overlapThreshold=0.5f,
00320                          int numThreads=-1 );
00321 
00322     const vector<string>& getClassNames() const;
00323     size_t getClassCount() const;
00324 
00325 private:
00326     vector<CvLatentSvmDetector*> detectors;
00327     vector<string> classNames;
00328 };
00329 
00330 CV_EXPORTS void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, int groupThreshold, double eps=0.2);
00331 CV_EXPORTS_W void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, CV_OUT vector<int>& weights, int groupThreshold, double eps=0.2);
00332 CV_EXPORTS void groupRectangles( vector<Rect>& rectList, int groupThreshold, double eps, vector<int>* weights, vector<double>* levelWeights );
00333 CV_EXPORTS void groupRectangles(vector<Rect>& rectList, vector<int>& rejectLevels,
00334                                 vector<double>& levelWeights, int groupThreshold, double eps=0.2);
00335 CV_EXPORTS void groupRectangles_meanshift(vector<Rect>& rectList, vector<double>& foundWeights, vector<double>& foundScales,
00336                                           double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
00337 
00338 
00339 class CV_EXPORTS FeatureEvaluator
00340 {
00341 public:
00342     enum { HAAR = 0, LBP = 1, HOG = 2 };
00343     virtual ~FeatureEvaluator();
00344 
00345     virtual bool read(const FileNode& node);
00346     virtual Ptr<FeatureEvaluator> clone() const;
00347     virtual int getFeatureType() const;
00348 
00349     virtual bool setImage(const Mat& img, Size origWinSize);
00350     virtual bool setWindow(Point p);
00351 
00352     virtual double calcOrd(int featureIdx) const;
00353     virtual int calcCat(int featureIdx) const;
00354 
00355     static Ptr<FeatureEvaluator> create(int type);
00356 };
00357 
00358 template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
00359 
00360 enum
00361 {
00362     CASCADE_DO_CANNY_PRUNING=1,
00363     CASCADE_SCALE_IMAGE=2,
00364     CASCADE_FIND_BIGGEST_OBJECT=4,
00365     CASCADE_DO_ROUGH_SEARCH=8
00366 };
00367 
00368 class CV_EXPORTS_W CascadeClassifier
00369 {
00370 public:
00371     CV_WRAP CascadeClassifier();
00372     CV_WRAP CascadeClassifier( const string& filename );
00373     virtual ~CascadeClassifier();
00374 
00375     CV_WRAP virtual bool empty() const;
00376     CV_WRAP bool load( const string& filename );
00377     virtual bool read( const FileNode& node );
00378     CV_WRAP virtual void detectMultiScale( const Mat& image,
00379                                    CV_OUT vector<Rect>& objects,
00380                                    double scaleFactor=1.1,
00381                                    int minNeighbors=3, int flags=0,
00382                                    Size minSize=Size(),
00383                                    Size maxSize=Size() );
00384 
00385     CV_WRAP virtual void detectMultiScale( const Mat& image,
00386                                    CV_OUT vector<Rect>& objects,
00387                                    vector<int>& rejectLevels,
00388                                    vector<double>& levelWeights,
00389                                    double scaleFactor=1.1,
00390                                    int minNeighbors=3, int flags=0,
00391                                    Size minSize=Size(),
00392                                    Size maxSize=Size(),
00393                                    bool outputRejectLevels=false );
00394 
00395 
00396     bool isOldFormatCascade() const;
00397     virtual Size getOriginalWindowSize() const;
00398     int getFeatureType() const;
00399     bool setImage( const Mat& );
00400 
00401 protected:
00402     //virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
00403     //                                int stripSize, int yStep, double factor, vector<Rect>& candidates );
00404 
00405     virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
00406                                     int stripSize, int yStep, double factor, vector<Rect>& candidates,
00407                                     vector<int>& rejectLevels, vector<double>& levelWeights, bool outputRejectLevels=false);
00408 
00409 protected:
00410     enum { BOOST = 0 };
00411     enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,
00412            FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
00413 
00414     friend class CascadeClassifierInvoker;
00415 
00416     template<class FEval>
00417     friend int predictOrdered( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
00418 
00419     template<class FEval>
00420     friend int predictCategorical( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
00421 
00422     template<class FEval>
00423     friend int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
00424 
00425     template<class FEval>
00426     friend int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
00427 
00428     bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
00429     virtual int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
00430 
00431     class Data
00432     {
00433     public:
00434         struct CV_EXPORTS DTreeNode
00435         {
00436             int featureIdx;
00437             float threshold; // for ordered features only
00438             int left;
00439             int right;
00440         };
00441 
00442         struct CV_EXPORTS DTree
00443         {
00444             int nodeCount;
00445         };
00446 
00447         struct CV_EXPORTS Stage
00448         {
00449             int first;
00450             int ntrees;
00451             float threshold;
00452         };
00453 
00454         bool read(const FileNode &node);
00455 
00456         bool isStumpBased;
00457 
00458         int stageType;
00459         int featureType;
00460         int ncategories;
00461         Size origWinSize;
00462 
00463         vector<Stage> stages;
00464         vector<DTree> classifiers;
00465         vector<DTreeNode> nodes;
00466         vector<float> leaves;
00467         vector<int> subsets;
00468     };
00469 
00470     Data data;
00471     Ptr<FeatureEvaluator> featureEvaluator;
00472     Ptr<CvHaarClassifierCascade> oldCascade;
00473 
00474 public:
00475     class CV_EXPORTS MaskGenerator
00476     {
00477     public:
00478         virtual ~MaskGenerator() {}
00479         virtual cv::Mat generateMask(const cv::Mat& src)=0;
00480         virtual void initializeMask(const cv::Mat& /*src*/) {};
00481     };
00482     void setMaskGenerator(Ptr<MaskGenerator> maskGenerator);
00483     Ptr<MaskGenerator> getMaskGenerator();
00484 
00485     void setFaceDetectionMaskGenerator();
00486 
00487 protected:
00488     Ptr<MaskGenerator> maskGenerator;
00489 };
00490 
00491 
00493 
00494 // struct for detection region of interest (ROI)
00495 struct DetectionROI
00496 {
00497    // scale(size) of the bounding box
00498    double scale;
00499    // set of requrested locations to be evaluated
00500    vector<cv::Point> locations;
00501    // vector that will contain confidence values for each location
00502    vector<double> confidences;
00503 };
00504 
00505 struct CV_EXPORTS_W HOGDescriptor
00506 {
00507 public:
00508     enum { L2Hys=0 };
00509     enum { DEFAULT_NLEVELS=64 };
00510 
00511     CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
00512         cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
00513         histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
00514         nlevels(HOGDescriptor::DEFAULT_NLEVELS)
00515     {}
00516 
00517     CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
00518                   Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
00519                   int _histogramNormType=HOGDescriptor::L2Hys,
00520                   double _L2HysThreshold=0.2, bool _gammaCorrection=false,
00521                   int _nlevels=HOGDescriptor::DEFAULT_NLEVELS)
00522     : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
00523     nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
00524     histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
00525     gammaCorrection(_gammaCorrection), nlevels(_nlevels)
00526     {}
00527 
00528     CV_WRAP HOGDescriptor(const String& filename)
00529     {
00530         load(filename);
00531     }
00532 
00533     HOGDescriptor(const HOGDescriptor& d)
00534     {
00535         d.copyTo(*this);
00536     }
00537 
00538     virtual ~HOGDescriptor() {}
00539 
00540     CV_WRAP size_t getDescriptorSize() const;
00541     CV_WRAP bool checkDetectorSize() const;
00542     CV_WRAP double getWinSigma() const;
00543 
00544     CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
00545 
00546     virtual bool read(FileNode& fn);
00547     virtual void write(FileStorage& fs, const String& objname) const;
00548 
00549     CV_WRAP virtual bool load(const String& filename, const String& objname=String());
00550     CV_WRAP virtual void save(const String& filename, const String& objname=String()) const;
00551     virtual void copyTo(HOGDescriptor& c) const;
00552 
00553     CV_WRAP virtual void compute(const Mat& img,
00554                          CV_OUT vector<float>& descriptors,
00555                          Size winStride=Size(), Size padding=Size(),
00556                          const vector<Point>& locations=vector<Point>()) const;
00557     //with found weights output
00558     CV_WRAP virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
00559                         CV_OUT vector<double>& weights,
00560                         double hitThreshold=0, Size winStride=Size(),
00561                         Size padding=Size(),
00562                         const vector<Point>& searchLocations=vector<Point>()) const;
00563     //without found weights output
00564     virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
00565                         double hitThreshold=0, Size winStride=Size(),
00566                         Size padding=Size(),
00567                         const vector<Point>& searchLocations=vector<Point>()) const;
00568     //with result weights output
00569     CV_WRAP virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
00570                                   CV_OUT vector<double>& foundWeights, double hitThreshold=0,
00571                                   Size winStride=Size(), Size padding=Size(), double scale=1.05,
00572                                   double finalThreshold=2.0,bool useMeanshiftGrouping = false) const;
00573     //without found weights output
00574     virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
00575                                   double hitThreshold=0, Size winStride=Size(),
00576                                   Size padding=Size(), double scale=1.05,
00577                                   double finalThreshold=2.0, bool useMeanshiftGrouping = false) const;
00578 
00579     CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
00580                                  Size paddingTL=Size(), Size paddingBR=Size()) const;
00581 
00582     CV_WRAP static vector<float> getDefaultPeopleDetector();
00583     CV_WRAP static vector<float> getDaimlerPeopleDetector();
00584 
00585     CV_PROP Size winSize;
00586     CV_PROP Size blockSize;
00587     CV_PROP Size blockStride;
00588     CV_PROP Size cellSize;
00589     CV_PROP int nbins;
00590     CV_PROP int derivAperture;
00591     CV_PROP double winSigma;
00592     CV_PROP int histogramNormType;
00593     CV_PROP double L2HysThreshold;
00594     CV_PROP bool gammaCorrection;
00595     CV_PROP vector<float> svmDetector;
00596     CV_PROP int nlevels;
00597 
00598 
00599    // evaluate specified ROI and return confidence value for each location
00600    void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
00601                                    CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
00602                                    double hitThreshold = 0, cv::Size winStride = Size(),
00603                                    cv::Size padding = Size()) const;
00604 
00605    // evaluate specified ROI and return confidence value for each location in multiple scales
00606    void detectMultiScaleROI(const cv::Mat& img,
00607                                                        CV_OUT std::vector<cv::Rect>& foundLocations,
00608                                                        std::vector<DetectionROI>& locations,
00609                                                        double hitThreshold = 0,
00610                                                        int groupThreshold = 0) const;
00611 
00612    // read/parse Dalal's alt model file
00613    void readALTModel(std::string modelfile);
00614 };
00615 
00616 
00617 CV_EXPORTS_W void findDataMatrix(InputArray image,
00618                                  CV_OUT vector<string>& codes,
00619                                  OutputArray corners=noArray(),
00620                                  OutputArrayOfArrays dmtx=noArray());
00621 CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
00622                                       const vector<string>& codes,
00623                                       InputArray corners);
00624 }
00625 
00626 /****************************************************************************************\
00627 *                                Datamatrix                                              *
00628 \****************************************************************************************/
00629 
00630 struct CV_EXPORTS CvDataMatrixCode {
00631   char msg[4];
00632   CvMat *original;
00633   CvMat *corners;
00634 };
00635 
00636 CV_EXPORTS std::deque<CvDataMatrixCode> cvFindDataMatrix(CvMat *im);
00637 
00638 /****************************************************************************************\
00639 *                                 LINE-MOD                                               *
00640 \****************************************************************************************/
00641 
00642 namespace cv {
00643 namespace linemod {
00644 
00645 using cv::FileNode;
00646 using cv::FileStorage;
00647 using cv::Mat;
00648 using cv::noArray;
00649 using cv::OutputArrayOfArrays;
00650 using cv::Point;
00651 using cv::Ptr;
00652 using cv::Rect;
00653 using cv::Size;
00654 
00656 
00660 struct CV_EXPORTS Feature
00661 {
00662   int x; 
00663   int y; 
00664   int label; 
00665 
00666   Feature() : x(0), y(0), label(0) {}
00667   Feature(int x, int y, int label);
00668 
00669   void read(const FileNode& fn);
00670   void write(FileStorage& fs) const;
00671 };
00672 
00673 inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
00674 
00675 struct CV_EXPORTS Template
00676 {
00677   int width;
00678   int height;
00679   int pyramid_level;
00680   std::vector<Feature> features;
00681 
00682   void read(const FileNode& fn);
00683   void write(FileStorage& fs) const;
00684 };
00685 
00689 class QuantizedPyramid
00690 {
00691 public:
00692   // Virtual destructor
00693   virtual ~QuantizedPyramid() {}
00694 
00701   virtual void quantize(Mat& dst) const =0;
00702 
00708   virtual bool extractTemplate(Template& templ) const =0;
00709 
00715   virtual void pyrDown() =0;
00716 
00717 protected:
00719   struct Candidate
00720   {
00721     Candidate(int x, int y, int label, float score);
00722 
00724     bool operator<(const Candidate& rhs) const
00725     {
00726       return score > rhs.score;
00727     }
00728 
00729     Feature f;
00730     float score;
00731   };
00732 
00741   static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
00742                                       std::vector<Feature>& features,
00743                                       size_t num_features, float distance);
00744 };
00745 
00746 inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
00747 
00753 class CV_EXPORTS Modality
00754 {
00755 public:
00756   // Virtual destructor
00757   virtual ~Modality() {}
00758 
00766   Ptr<QuantizedPyramid> process(const Mat& src,
00767                     const Mat& mask = Mat()) const
00768   {
00769     return processImpl(src, mask);
00770   }
00771 
00772   virtual std::string name() const =0;
00773 
00774   virtual void read(const FileNode& fn) =0;
00775   virtual void write(FileStorage& fs) const =0;
00776 
00784   static Ptr<Modality> create(const std::string& modality_type);
00785 
00789   static Ptr<Modality> create(const FileNode& fn);
00790 
00791 protected:
00792   // Indirection is because process() has a default parameter.
00793   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
00794                         const Mat& mask) const =0;
00795 };
00796 
00800 class CV_EXPORTS ColorGradient : public Modality
00801 {
00802 public:
00806   ColorGradient();
00807 
00816   ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
00817 
00818   virtual std::string name() const;
00819 
00820   virtual void read(const FileNode& fn);
00821   virtual void write(FileStorage& fs) const;
00822 
00823   float weak_threshold;
00824   size_t num_features;
00825   float strong_threshold;
00826 
00827 protected:
00828   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
00829                         const Mat& mask) const;
00830 };
00831 
00835 class CV_EXPORTS DepthNormal : public Modality
00836 {
00837 public:
00841   DepthNormal();
00842 
00853   DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
00854               int extract_threshold);
00855 
00856   virtual std::string name() const;
00857 
00858   virtual void read(const FileNode& fn);
00859   virtual void write(FileStorage& fs) const;
00860 
00861   int distance_threshold;
00862   int difference_threshold;
00863   size_t num_features;
00864   int extract_threshold;
00865 
00866 protected:
00867   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
00868                         const Mat& mask) const;
00869 };
00870 
00874 void colormap(const Mat& quantized, Mat& dst);
00875 
00879 struct CV_EXPORTS Match
00880 {
00881   Match()
00882   {
00883   }
00884 
00885   Match(int x, int y, float similarity, const std::string& class_id, int template_id);
00886 
00888   bool operator<(const Match& rhs) const
00889   {
00890     // Secondarily sort on template_id for the sake of duplicate removal
00891     if (similarity != rhs.similarity)
00892       return similarity > rhs.similarity;
00893     else
00894       return template_id < rhs.template_id;
00895   }
00896 
00897   bool operator==(const Match& rhs) const
00898   {
00899     return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
00900   }
00901 
00902   int x;
00903   int y;
00904   float similarity;
00905   std::string class_id;
00906   int template_id;
00907 };
00908 
00909 inline  Match::Match(int _x, int _y, float _similarity, const std::string& _class_id, int _template_id)
00910     : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
00911   {
00912   }
00913 
00918 class CV_EXPORTS Detector
00919 {
00920 public:
00924   Detector();
00925 
00933   Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
00934 
00950   void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
00951              const std::vector<std::string>& class_ids = std::vector<std::string>(),
00952              OutputArrayOfArrays quantized_images = noArray(),
00953              const std::vector<Mat>& masks = std::vector<Mat>()) const;
00954 
00965   int addTemplate(const std::vector<Mat>& sources, const std::string& class_id,
00966           const Mat& object_mask, Rect* bounding_box = NULL);
00967 
00971   int addSyntheticTemplate(const std::vector<Template>& templates, const std::string& class_id);
00972 
00979   const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
00980 
00984   int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
00985 
00989   int pyramidLevels() const { return pyramid_levels; }
00990 
00997   const std::vector<Template>& getTemplates(const std::string& class_id, int template_id) const;
00998 
00999   int numTemplates() const;
01000   int numTemplates(const std::string& class_id) const;
01001   int numClasses() const { return static_cast<int>(class_templates.size()); }
01002 
01003   std::vector<std::string> classIds() const;
01004 
01005   void read(const FileNode& fn);
01006   void write(FileStorage& fs) const;
01007 
01008   std::string readClass(const FileNode& fn, const std::string &class_id_override = "");
01009   void writeClass(const std::string& class_id, FileStorage& fs) const;
01010 
01011   void readClasses(const std::vector<std::string>& class_ids,
01012                    const std::string& format = "templates_%s.yml.gz");
01013   void writeClasses(const std::string& format = "templates_%s.yml.gz") const;
01014 
01015 protected:
01016   std::vector< Ptr<Modality> > modalities;
01017   int pyramid_levels;
01018   std::vector<int> T_at_level;
01019 
01020   typedef std::vector<Template> TemplatePyramid;
01021   typedef std::map<std::string, std::vector<TemplatePyramid> > TemplatesMap;
01022   TemplatesMap class_templates;
01023 
01024   typedef std::vector<Mat> LinearMemories;
01025   // Indexed as [pyramid level][modality][quantized label]
01026   typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
01027 
01028   void matchClass(const LinearMemoryPyramid& lm_pyramid,
01029                   const std::vector<Size>& sizes,
01030                   float threshold, std::vector<Match>& matches,
01031                   const std::string& class_id,
01032                   const std::vector<TemplatePyramid>& template_pyramids) const;
01033 };
01034 
01040 CV_EXPORTS Ptr<Detector> getDefaultLINE();
01041 
01048 CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
01049 
01050 } // namespace linemod
01051 } // namespace cv
01052 
01053 #endif
01054 
01055 #endif