00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #ifndef __OPENCV_GPU_UTILITY_HPP__
00044 #define __OPENCV_GPU_UTILITY_HPP__
00045
00046 #include "saturate_cast.hpp"
00047 #include "datamov_utils.hpp"
00048 #include "detail/reduction_detail.hpp"
00049
00050 namespace cv { namespace gpu { namespace device
00051 {
00052 #define OPENCV_GPU_LOG_WARP_SIZE (5)
00053 #define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
00054 #define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
00055 #define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
00056
00058
00059
00060 template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
00061 {
00062 const T temp = a;
00063 a = b;
00064 b = temp;
00065 }
00066
00068
00069
00070 struct SingleMask
00071 {
00072 explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
00073 __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
00074
00075 __device__ __forceinline__ bool operator()(int y, int x) const
00076 {
00077 return mask.ptr(y)[x] != 0;
00078 }
00079
00080 PtrStepb mask;
00081 };
00082
00083 struct SingleMaskChannels
00084 {
00085 __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
00086 : mask(mask_), channels(channels_) {}
00087 __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
00088 :mask(mask_.mask), channels(mask_.channels){}
00089
00090 __device__ __forceinline__ bool operator()(int y, int x) const
00091 {
00092 return mask.ptr(y)[x / channels] != 0;
00093 }
00094
00095 PtrStepb mask;
00096 int channels;
00097 };
00098
00099 struct MaskCollection
00100 {
00101 explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
00102 : maskCollection(maskCollection_) {}
00103
00104 __device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
00105 : maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
00106
00107 __device__ __forceinline__ void next()
00108 {
00109 curMask = *maskCollection++;
00110 }
00111 __device__ __forceinline__ void setMask(int z)
00112 {
00113 curMask = maskCollection[z];
00114 }
00115
00116 __device__ __forceinline__ bool operator()(int y, int x) const
00117 {
00118 uchar val;
00119 return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
00120 }
00121
00122 const PtrStepb* maskCollection;
00123 PtrStepb curMask;
00124 };
00125
00126 struct WithOutMask
00127 {
00128 __device__ __forceinline__ WithOutMask(){}
00129 __device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
00130
00131 __device__ __forceinline__ void next() const
00132 {
00133 }
00134 __device__ __forceinline__ void setMask(int) const
00135 {
00136 }
00137
00138 __device__ __forceinline__ bool operator()(int, int) const
00139 {
00140 return true;
00141 }
00142
00143 __device__ __forceinline__ bool operator()(int, int, int) const
00144 {
00145 return true;
00146 }
00147
00148 static __device__ __forceinline__ bool check(int, int)
00149 {
00150 return true;
00151 }
00152
00153 static __device__ __forceinline__ bool check(int, int, int, uint offset = 0)
00154 {
00155 return true;
00156 }
00157 };
00158
00160
00161
00162 template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
00163 {
00164 StaticAssert<n >= 8 && n <= 512>::check();
00165 utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
00166 }
00167
00168 template <int n, typename T, typename V, typename Pred>
00169 __device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
00170 {
00171 StaticAssert<n >= 8 && n <= 512>::check();
00172 utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
00173 }
00174
00175 template <int n, typename T, typename V1, typename V2, typename Pred>
00176 __device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
00177 {
00178 StaticAssert<n >= 8 && n <= 512>::check();
00179 utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
00180 }
00181
00183
00184
00185
00186 template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
00187 {
00188 T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
00189
00190 if (det != 0)
00191 {
00192 double invdet = 1.0 / det;
00193
00194 x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
00195
00196 x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
00197
00198 return true;
00199 }
00200
00201 return false;
00202 }
00203
00204
00205 template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
00206 {
00207 T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
00208 - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
00209 + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
00210
00211 if (det != 0)
00212 {
00213 double invdet = 1.0 / det;
00214
00215 x[0] = saturate_cast<T>(invdet *
00216 (b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
00217 A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
00218 A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
00219
00220 x[1] = saturate_cast<T>(invdet *
00221 (A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
00222 b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
00223 A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
00224
00225 x[2] = saturate_cast<T>(invdet *
00226 (A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
00227 A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
00228 b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
00229
00230 return true;
00231 }
00232
00233 return false;
00234 }
00235 }}}
00236
00237 #endif // __OPENCV_GPU_UTILITY_HPP__