43 #ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
44 #define __OPENCV_GPU_VEC_DISTANCE_HPP__
50 namespace cv {
namespace gpu {
namespace device
59 __device__ __forceinline__
void reduceIter(
int val1,
int val2)
64 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
int* smem,
int tid)
69 __device__ __forceinline__
operator int()
const
83 __device__ __forceinline__
void reduceIter(
float val1,
float val2)
85 mySum += ::fabs(val1 - val2);
88 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
float* smem,
int tid)
93 __device__ __forceinline__
operator float()
const
108 __device__ __forceinline__
void reduceIter(
float val1,
float val2)
110 float reg = val1 - val2;
114 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
float* smem,
int tid)
119 __device__ __forceinline__
operator float()
const
134 __device__ __forceinline__
void reduceIter(
int val1,
int val2)
136 mySum += __popc(val1 ^ val2);
139 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
int* smem,
int tid)
144 __device__ __forceinline__
operator int()
const
153 template <
int THREAD_DIM,
typename Dist,
typename T1,
typename T2>
156 for (
int i = tid; i <
len; i += THREAD_DIM)
164 dist.reduceIter(val1, val2);
167 dist.reduceAll<THREAD_DIM>(smem, tid);
171 template <
int THREAD_DIM,
int MAX_LEN,
bool LEN_EQ_MAX_LEN,
typename Dist,
typename T1,
typename T2>
172 __device__ __forceinline__
void calcVecDiffCached(
const T1* vecCached,
const T2* vecGlob,
int len, Dist&
dist,
typename Dist::result_type* smem,
int tid)
176 dist.reduceAll<THREAD_DIM>(smem, tid);
182 explicit __device__ __forceinline__
VecDiffGlobal(
const T1* vec1_,
int = 0,
void* = 0,
int = 0,
int = 0)
187 template <
typename T2,
typename Dist>
188 __device__ __forceinline__
void calc(
const T2*
vec2,
int len, Dist&
dist,
typename Dist::result_type* smem,
int tid)
const
202 smem[glob_tid] = vec1[glob_tid];
208 for (
int i = tid; i < MAX_LEN; i += THREAD_DIM)
209 *vec1ValsPtr++ = smem[i];
214 template <
typename T2,
typename Dist>
215 __device__ __forceinline__
void calc(
const T2*
vec2,
int len, Dist&
dist,
typename Dist::result_type* smem,
int tid)
const
217 calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(
vec1Vals,
vec2,
len,
dist, smem, tid);
224 #endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
__device__ __forceinline__ void reduceAll(float *smem, int tid)
Definition: vec_distance.hpp:114
__device__ __forceinline__ void reduceIter(int val1, int val2)
Definition: vec_distance.hpp:134
__device__ __forceinline__ VecDiffGlobal(const T1 *vec1_, int=0, void *=0, int=0, int=0)
Definition: vec_distance.hpp:182
const CvArr * U
Definition: core_c.h:733
__device__ __forceinline__ void reduceIter(float val1, float val2)
Definition: vec_distance.hpp:108
__device__ __forceinline__ L1Dist()
Definition: vec_distance.hpp:57
float mySum
Definition: vec_distance.hpp:124
__device__ __forceinline__ VecDiffCachedRegister(const T1 *vec1, int len, U *smem, int glob_tid, int tid)
Definition: vec_distance.hpp:199
__device__ __forceinline__ void reduceIter(float val1, float val2)
Definition: vec_distance.hpp:83
float mySum
Definition: vec_distance.hpp:98
__device__ __forceinline__ void reduceAll(int *smem, int tid)
Definition: vec_distance.hpp:139
Definition: functional.hpp:59
const T1 * vec1
Definition: vec_distance.hpp:193
const void int len
Definition: core_c.h:1568
__device__ __forceinline__ L2Dist()
Definition: vec_distance.hpp:106
float result_type
Definition: vec_distance.hpp:104
const CvArr * vec2
Definition: core_c.h:812
__device__ __forceinline__ void calc(const T2 *vec2, int len, Dist &dist, typename Dist::result_type *smem, int tid) const
Definition: vec_distance.hpp:188
int value_type
Definition: vec_distance.hpp:54
float value_type
Definition: vec_distance.hpp:78
__device__ __forceinline__ L1Dist()
Definition: vec_distance.hpp:81
Definition: vec_distance.hpp:101
__device__ __forceinline__ void calc(const T2 *vec2, int len, Dist &dist, typename Dist::result_type *smem, int tid) const
Definition: vec_distance.hpp:215
int mySum
Definition: vec_distance.hpp:74
int value_type
Definition: vec_distance.hpp:129
Definition: vec_distance_detail.hpp:97
__device__ __forceinline__ void reduceAll(float *smem, int tid)
Definition: vec_distance.hpp:88
Definition: vec_distance.hpp:197
__device__ void calcVecDiffGlobal(const T1 *vec1, const T2 *vec2, int len, Dist &dist, typename Dist::result_type *smem, int tid)
Definition: vec_distance.hpp:154
Definition: vec_distance.hpp:180
int result_type
Definition: vec_distance.hpp:55
::max::max::max float
Definition: functional.hpp:326
Definition: vec_distance.hpp:127
float result_type
Definition: vec_distance.hpp:79
::max::max int
Definition: functional.hpp:324
__device__ __forceinline__ void reduceIter(int val1, int val2)
Definition: vec_distance.hpp:59
float value_type
Definition: vec_distance.hpp:103
int result_type
Definition: vec_distance.hpp:130
CvPoint3D64f double * dist
Definition: legacy.hpp:556
Definition: vec_distance.hpp:52
__device__ __forceinline__ void calcVecDiffCached(const T1 *vecCached, const T2 *vecGlob, int len, Dist &dist, typename Dist::result_type *smem, int tid)
Definition: vec_distance.hpp:172
U vec1Vals[MAX_LEN/THREAD_DIM]
Definition: vec_distance.hpp:220
__device__ __forceinline__ void reduceAll(int *smem, int tid)
Definition: vec_distance.hpp:64
__device__ __forceinline__ HammingDist()
Definition: vec_distance.hpp:132
__device__ static __forceinline__ void Load(const T *ptr, int offset, T &val)
Definition: datamov_utils.hpp:55
int mySum
Definition: vec_distance.hpp:149