43 #ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
44 #define __OPENCV_GPU_DEVICE_WARP_HPP__
46 namespace cv {
namespace gpu {
namespace device
58 static __device__ __forceinline__
unsigned int laneId()
61 asm(
"mov.u32 %0, %laneid;" :
"=r"(ret) );
65 template<
typename It,
typename T>
66 static __device__ __forceinline__
void fill(It beg, It
end,
const T&
value)
72 template<
typename InIt,
typename OutIt>
73 static __device__ __forceinline__ OutIt
copy(InIt beg, InIt
end, OutIt out)
80 template<
typename InIt,
typename OutIt,
class UnOp>
81 static __device__ __forceinline__ OutIt
transform(InIt beg, InIt
end, OutIt out, UnOp op)
88 template<
typename InIt1,
typename InIt2,
typename OutIt,
class BinOp>
89 static __device__ __forceinline__ OutIt
transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
91 unsigned int lane =
laneId();
93 InIt1 t1 = beg1 + lane;
94 InIt2 t2 = beg2 + lane;
100 template <
class T,
class BinOp>
101 static __device__ __forceinline__
T reduce(
volatile T *
ptr, BinOp op)
103 const unsigned int lane =
laneId();
107 T partial = ptr[lane];
109 ptr[lane] = partial = op(partial, ptr[lane + 16]);
110 ptr[lane] = partial = op(partial, ptr[lane + 8]);
111 ptr[lane] = partial = op(partial, ptr[lane + 4]);
112 ptr[lane] = partial = op(partial, ptr[lane + 2]);
113 ptr[lane] = partial = op(partial, ptr[lane + 1]);
119 template<
typename OutIt,
typename T>
122 unsigned int lane =
laneId();
static __device__ __forceinline__ unsigned int laneId()
Returns the warp lane ID of the calling thread.
Definition: warp.hpp:58
const CvMat const CvMat const CvMat CvMat CvMat CvMat CvMat CvSize CvMat CvMat * T
Definition: calib3d.hpp:270
static __device__ __forceinline__ void fill(It beg, It end, const T &value)
Definition: warp.hpp:66
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
Definition: warp.hpp:101
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
Definition: warp.hpp:120
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
Definition: warp.hpp:81
GLsizei const GLfloat * value
Definition: core_c.h:341
const char * ptr
Definition: core_c.h:942
double double end
Definition: core_c.h:774
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
Definition: warp.hpp:89
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
Definition: warp.hpp:73