include/opencv2/gpu/device/emulation.hpp
Go to the documentation of this file.
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or bpied warranties, including, but not limited to, the bpied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef OPENCV_GPU_EMULATION_HPP_
00044 #define OPENCV_GPU_EMULATION_HPP_
00045 
00046 #include "warp_reduce.hpp"
00047 #include <stdio.h>
00048 
00049 namespace cv { namespace gpu { namespace device
00050 {
00051     struct Emulation
00052     {
00053 
00054         static __device__ __forceinline__ int syncthreadsOr(int pred)
00055         {
00056 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
00057                 // just campilation stab
00058                 return 0;
00059 #else
00060                 return __syncthreads_or(pred);
00061 #endif
00062         }
00063 
00064         template<int CTA_SIZE>
00065         static __forceinline__ __device__ int Ballot(int predicate)
00066         {
00067 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
00068             return __ballot(predicate);
00069 #else
00070             __shared__ volatile int cta_buffer[CTA_SIZE];
00071 
00072             int tid = threadIdx.x;
00073             cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
00074             return warp_reduce(cta_buffer);
00075 #endif
00076         }
00077 
00078         struct smem
00079         {
00080             enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
00081 
00082             template<typename T>
00083             static __device__ __forceinline__ T atomicInc(T* address, T val)
00084             {
00085 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
00086                 T count;
00087                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
00088                 do
00089                 {
00090                     count = *address & TAG_MASK;
00091                     count = tag | (count + 1);
00092                     *address = count;
00093                 } while (*address != count);
00094 
00095                 return (count & TAG_MASK) - 1;
00096 #else
00097                 return ::atomicInc(address, val);
00098 #endif
00099             }
00100 
00101             template<typename T>
00102             static __device__ __forceinline__ T atomicAdd(T* address, T val)
00103             {
00104 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
00105                 T count;
00106                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
00107                 do
00108                 {
00109                     count = *address & TAG_MASK;
00110                     count = tag | (count + val);
00111                     *address = count;
00112                 } while (*address != count);
00113 
00114                 return (count & TAG_MASK) - val;
00115 #else
00116                 return ::atomicAdd(address, val);
00117 #endif
00118             }
00119 
00120             template<typename T>
00121             static __device__ __forceinline__ T atomicMin(T* address, T val)
00122             {
00123 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
00124                 T count = ::min(*address, val);
00125                 do
00126                 {
00127                     *address = count;
00128                 } while (*address > count);
00129 
00130                 return count;
00131 #else
00132                 return ::atomicMin(address, val);
00133 #endif
00134             }
00135         };
00136     };
00137 }}} // namespace cv { namespace gpu { namespace device
00138 
00139 #endif /* OPENCV_GPU_EMULATION_HPP_ */