emulation.hpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #ifndef OPENCV_GPU_EMULATION_HPP_
44 #define OPENCV_GPU_EMULATION_HPP_
45 
46 #include "warp_reduce.hpp"
47 
48 namespace cv { namespace gpu { namespace device
49 {
50  struct Emulation
51  {
52 
53  static __device__ __forceinline__ int syncthreadsOr(int pred)
54  {
55 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
56  // just campilation stab
57  return 0;
58 #else
59  return __syncthreads_or(pred);
60 #endif
61  }
62 
63  template<int CTA_SIZE>
64  static __forceinline__ __device__ int Ballot(int predicate)
65  {
66 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
67  return __ballot(predicate);
68 #else
69  __shared__ volatile int cta_buffer[CTA_SIZE];
70 
71  int tid = threadIdx.x;
72  cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
73  return warp_reduce(cta_buffer);
74 #endif
75  }
76 
77  struct smem
78  {
79  enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
80 
81  template<typename T>
82  static __device__ __forceinline__ T atomicInc(T* address, T val)
83  {
84 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
85  T count;
86  unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
87  do
88  {
89  count = *address & TAG_MASK;
90  count = tag | (count + 1);
91  *address = count;
92  } while (*address != count);
93 
94  return (count & TAG_MASK) - 1;
95 #else
96  return ::atomicInc(address, val);
97 #endif
98  }
99 
100  template<typename T>
101  static __device__ __forceinline__ T atomicAdd(T* address, T val)
102  {
103 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
104  T count;
105  unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
106  do
107  {
108  count = *address & TAG_MASK;
109  count = tag | (count + val);
110  *address = count;
111  } while (*address != count);
112 
113  return (count & TAG_MASK) - val;
114 #else
115  return ::atomicAdd(address, val);
116 #endif
117  }
118 
119  template<typename T>
120  static __device__ __forceinline__ T atomicMin(T* address, T val)
121  {
122 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
123  T count = ::min(*address, val);
124  do
125  {
126  *address = count;
127  } while (*address > count);
128 
129  return count;
130 #else
131  return ::atomicMin(address, val);
132 #endif
133  }
134  };
135  };
136 }}} // namespace cv { namespace gpu { namespace device
137 
138 #endif /* OPENCV_GPU_EMULATION_HPP_ */
static __device__ __forceinline__ int syncthreadsOr(int pred)
Definition: emulation.hpp:53
CV_EXPORTS void min(const GpuMat &src1, const GpuMat &src2, GpuMat &dst, Stream &stream=Stream::Null())
computes per-element minimum of two arrays (dst = min(src1, src2))
CvSize CvPoint2D32f int count
Definition: calib3d.hpp:221
const CvArr * U
Definition: core_c.h:733
static __device__ __forceinline__ T atomicInc(T *address, T val)
Definition: emulation.hpp:82
const CvMat const CvMat const CvMat CvMat CvMat CvMat CvMat CvSize CvMat CvMat * T
Definition: calib3d.hpp:270
Definition: emulation.hpp:79
Definition: emulation.hpp:50
GLuint GLfloat * val
GLuint GLuint GLsizei count
Definition: core_c.h:973
__device__ __forceinline__ T warp_reduce(volatile T *ptr, const unsigned int tid=threadIdx.x)
Definition: warp_reduce.hpp:49
static __device__ __forceinline__ T atomicMin(T *address, T val)
Definition: emulation.hpp:120
::max::max int
Definition: functional.hpp:324
static __device__ __forceinline__ T atomicAdd(T *address, T val)
Definition: emulation.hpp:101
GLuint address
static __forceinline__ __device__ int Ballot(int predicate)
Definition: emulation.hpp:64
Definition: emulation.hpp:77