43 #ifndef OPENCV_GPU_EMULATION_HPP_
44 #define OPENCV_GPU_EMULATION_HPP_
48 namespace cv {
namespace gpu {
namespace device
55 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
59 return __syncthreads_or(pred);
63 template<
int CTA_SIZE>
64 static __forceinline__ __device__
int Ballot(
int predicate)
66 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
67 return __ballot(predicate);
69 __shared__
volatile int cta_buffer[CTA_SIZE];
71 int tid = threadIdx.x;
72 cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
84 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
86 unsigned int tag = threadIdx.x << ( (
sizeof(
unsigned int) << 3) - 5
U);
90 count = tag | (count + 1);
92 }
while (*address != count);
96 return ::atomicInc(address, val);
103 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
105 unsigned int tag = threadIdx.x << ( (
sizeof(
unsigned int) << 3) - 5
U);
109 count = tag | (count + val);
111 }
while (*address != count);
115 return ::atomicAdd(address, val);
122 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
127 }
while (*address > count);
131 return ::atomicMin(address, val);
static __device__ __forceinline__ int syncthreadsOr(int pred)
Definition: emulation.hpp:53
CV_EXPORTS void min(const GpuMat &src1, const GpuMat &src2, GpuMat &dst, Stream &stream=Stream::Null())
computes per-element minimum of two arrays (dst = min(src1, src2))
CvSize CvPoint2D32f int count
Definition: calib3d.hpp:221
const CvArr * U
Definition: core_c.h:733
static __device__ __forceinline__ T atomicInc(T *address, T val)
Definition: emulation.hpp:82
const CvMat const CvMat const CvMat CvMat CvMat CvMat CvMat CvSize CvMat CvMat * T
Definition: calib3d.hpp:270
Definition: emulation.hpp:79
Definition: emulation.hpp:50
GLuint GLuint GLsizei count
Definition: core_c.h:973
__device__ __forceinline__ T warp_reduce(volatile T *ptr, const unsigned int tid=threadIdx.x)
Definition: warp_reduce.hpp:49
static __device__ __forceinline__ T atomicMin(T *address, T val)
Definition: emulation.hpp:120
::max::max int
Definition: functional.hpp:324
static __device__ __forceinline__ T atomicAdd(T *address, T val)
Definition: emulation.hpp:101
static __forceinline__ __device__ int Ballot(int predicate)
Definition: emulation.hpp:64
Definition: emulation.hpp:77