00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
00044 #define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
00045
00046 #include "../datamov_utils.hpp"
00047
00048 namespace cv { namespace gpu { namespace device
00049 {
00050 namespace vec_distance_detail
00051 {
00052 template <int THREAD_DIM, int N> struct UnrollVecDiffCached
00053 {
00054 template <typename Dist, typename T1, typename T2>
00055 static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
00056 {
00057 if (ind < len)
00058 {
00059 T1 val1 = *vecCached++;
00060
00061 T2 val2;
00062 ForceGlob<T2>::Load(vecGlob, ind, val2);
00063
00064 dist.reduceIter(val1, val2);
00065
00066 UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
00067 }
00068 }
00069
00070 template <typename Dist, typename T1, typename T2>
00071 static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
00072 {
00073 T1 val1 = *vecCached++;
00074
00075 T2 val2;
00076 ForceGlob<T2>::Load(vecGlob, 0, val2);
00077 vecGlob += THREAD_DIM;
00078
00079 dist.reduceIter(val1, val2);
00080
00081 UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
00082 }
00083 };
00084 template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
00085 {
00086 template <typename Dist, typename T1, typename T2>
00087 static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
00088 {
00089 }
00090
00091 template <typename Dist, typename T1, typename T2>
00092 static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
00093 {
00094 }
00095 };
00096
00097 template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
00098 template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
00099 {
00100 template <typename Dist, typename T1, typename T2>
00101 static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
00102 {
00103 UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
00104 }
00105 };
00106 template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
00107 {
00108 template <typename Dist, typename T1, typename T2>
00109 static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
00110 {
00111 UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
00112 }
00113 };
00114 }
00115 }}}
00116
00117 #endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__