include/opencv2/gpu/device/vec_distance.hpp
Go to the documentation of this file.
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
00044 #define __OPENCV_GPU_VEC_DISTANCE_HPP__
00045 
00046 #include "utility.hpp"
00047 #include "functional.hpp"
00048 #include "detail/vec_distance_detail.hpp"
00049 
00050 namespace cv { namespace gpu { namespace device
00051 {
00052     template <typename T> struct L1Dist
00053     {
00054         typedef int value_type;
00055         typedef int result_type;
00056 
00057         __device__ __forceinline__ L1Dist() : mySum(0) {}
00058 
00059         __device__ __forceinline__ void reduceIter(int val1, int val2)
00060         {
00061             mySum = __sad(val1, val2, mySum);
00062         }
00063 
00064         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
00065         {
00066             reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
00067         }
00068 
00069         __device__ __forceinline__ operator int() const
00070         {
00071             return mySum;
00072         }
00073 
00074         int mySum;
00075     };
00076     template <> struct L1Dist<float>
00077     {
00078         typedef float value_type;
00079         typedef float result_type;
00080 
00081         __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
00082 
00083         __device__ __forceinline__ void reduceIter(float val1, float val2)
00084         {
00085             mySum += ::fabs(val1 - val2);
00086         }
00087 
00088         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
00089         {
00090             reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
00091         }
00092 
00093         __device__ __forceinline__ operator float() const
00094         {
00095             return mySum;
00096         }
00097 
00098         float mySum;
00099     };
00100 
00101     struct L2Dist
00102     {
00103         typedef float value_type;
00104         typedef float result_type;
00105 
00106         __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
00107 
00108         __device__ __forceinline__ void reduceIter(float val1, float val2)
00109         {
00110             float reg = val1 - val2;
00111             mySum += reg * reg;
00112         }
00113 
00114         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
00115         {
00116             reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
00117         }
00118 
00119         __device__ __forceinline__ operator float() const
00120         {
00121             return sqrtf(mySum);
00122         }
00123 
00124         float mySum;
00125     };
00126 
00127     struct HammingDist
00128     {
00129         typedef int value_type;
00130         typedef int result_type;
00131 
00132         __device__ __forceinline__ HammingDist() : mySum(0) {}
00133 
00134         __device__ __forceinline__ void reduceIter(int val1, int val2)
00135         {
00136             mySum += __popc(val1 ^ val2);
00137         }
00138 
00139         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
00140         {
00141             reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
00142         }
00143 
00144         __device__ __forceinline__ operator int() const
00145         {
00146             return mySum;
00147         }
00148 
00149         int mySum;
00150     };
00151 
00152     // calc distance between two vectors in global memory
00153     template <int THREAD_DIM, typename Dist, typename T1, typename T2>
00154     __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
00155     {
00156         for (int i = tid; i < len; i += THREAD_DIM)
00157         {
00158             T1 val1;
00159             ForceGlob<T1>::Load(vec1, i, val1);
00160 
00161             T2 val2;
00162             ForceGlob<T2>::Load(vec2, i, val2);
00163 
00164             dist.reduceIter(val1, val2);
00165         }
00166 
00167         dist.reduceAll<THREAD_DIM>(smem, tid);
00168     }
00169 
00170     // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
00171     template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
00172     __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
00173     {
00174         vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
00175 
00176         dist.reduceAll<THREAD_DIM>(smem, tid);
00177     }
00178 
00179     // calc distance between two vectors in global memory
00180     template <int THREAD_DIM, typename T1> struct VecDiffGlobal
00181     {
00182         explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
00183         {
00184             vec1 = vec1_;
00185         }
00186 
00187         template <typename T2, typename Dist>
00188         __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
00189         {
00190             calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
00191         }
00192 
00193         const T1* vec1;
00194     };
00195 
00196     // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
00197     template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
00198     {
00199         template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
00200         {
00201             if (glob_tid < len)
00202                 smem[glob_tid] = vec1[glob_tid];
00203             __syncthreads();
00204 
00205             U* vec1ValsPtr = vec1Vals;
00206 
00207             #pragma unroll
00208             for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
00209                 *vec1ValsPtr++ = smem[i];
00210 
00211             __syncthreads();
00212         }
00213 
00214         template <typename T2, typename Dist>
00215         __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
00216         {
00217             calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
00218         }
00219 
00220         U vec1Vals[MAX_LEN / THREAD_DIM];
00221     };
00222 }}} // namespace cv { namespace gpu { namespace device
00223 
00224 #endif // __OPENCV_GPU_VEC_DISTANCE_HPP__