Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
00044 #define __OPENCV_GPU_DATAMOV_UTILS_HPP__
00045
00046 #include "common.hpp"
00047
00048 namespace cv { namespace gpu { namespace device
00049 {
00050 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
00051
00052
00053 template <typename T> struct ForceGlob
00054 {
00055 __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
00056 };
00057
00058 #else // __CUDA_ARCH__ >= 200
00059
00060 #if defined(_WIN64) || defined(__LP64__)
00061
00062 #define OPENCV_GPU_ASM_PTR "l"
00063 #else
00064
00065 #define OPENCV_GPU_ASM_PTR "r"
00066 #endif
00067
00068 template<class T> struct ForceGlob;
00069
00070 #define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
00071 template <> struct ForceGlob<base_type> \
00072 { \
00073 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
00074 { \
00075 asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
00076 } \
00077 };
00078
00079 #define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
00080 template <> struct ForceGlob<base_type> \
00081 { \
00082 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
00083 { \
00084 asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
00085 } \
00086 };
00087
00088 OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
00089 OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
00090 OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
00091 OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
00092 OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
00093 OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
00094 OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
00095 OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
00096 OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
00097
00098 #undef OPENCV_GPU_DEFINE_FORCE_GLOB
00099 #undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
00100 #undef OPENCV_GPU_ASM_PTR
00101
00102 #endif // __CUDA_ARCH__ >= 200
00103 }}}
00104
00105 #endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__