43 #ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
44 #define __OPENCV_GPU_DATAMOV_UTILS_HPP__
48 namespace cv {
namespace gpu {
namespace device
50 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
58 #else // __CUDA_ARCH__ >= 200
60 #if defined(_WIN64) || defined(__LP64__)
62 #define OPENCV_GPU_ASM_PTR "l"
65 #define OPENCV_GPU_ASM_PTR "r"
68 template<
class T>
struct ForceGlob;
70 #define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
71 template <> struct ForceGlob<base_type> \
73 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
75 asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
79 #define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
80 template <> struct ForceGlob<base_type> \
82 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
84 asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
98 #undef OPENCV_GPU_DEFINE_FORCE_GLOB
99 #undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
100 #undef OPENCV_GPU_ASM_PTR
102 #endif // __CUDA_ARCH__ >= 200
105 #endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
GLdouble GLdouble GLdouble r
signed char schar
Definition: types_c.h:174
CvArr CvPoint offset
Definition: imgproc_c.h:77
s8 h u32
Definition: datamov_utils.hpp:93
s8 u16
Definition: datamov_utils.hpp:91
s8 h OPENCV_GPU_DEFINE_FORCE_GLOB(short, s16, h) OPENCV_GPU_DEFINE_FORCE_GLOB(uint
int d
Definition: legacy.hpp:3064
const CvMat const CvMat const CvMat CvMat CvMat CvMat CvMat CvSize CvMat CvMat * T
Definition: calib3d.hpp:270
unsigned int uint
Definition: common.hpp:104
Definition: datamov_utils.hpp:53
unsigned short ushort
Definition: common.hpp:101
int int int int int int h
const char * ptr
Definition: core_c.h:942
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8) OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar
unsigned char uchar
Definition: types_c.h:170
s8 h r f32
Definition: datamov_utils.hpp:95
__device__ static __forceinline__ void Load(const T *ptr, int offset, T &val)
Definition: datamov_utils.hpp:55