00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved. 00014 // Third party copyrights are property of their respective owners. 00015 // 00016 // Redistribution and use in source and binary forms, with or without modification, 00017 // are permitted provided that the following conditions are met: 00018 // 00019 // * Redistribution's of source code must retain the above copyright notice, 00020 // this list of conditions and the following disclaimer. 00021 // 00022 // * Redistribution's in binary form must reproduce the above copyright notice, 00023 // this list of conditions and the following disclaimer in the documentation 00024 // and/or other materials provided with the distribution. 00025 // 00026 // * The name of the copyright holders may not be used to endorse or promote products 00027 // derived from this software without specific prior written permission. 00028 // 00029 // This software is provided by the copyright holders and contributors "as is" and 00030 // any express or implied warranties, including, but not limited to, the implied 00031 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00032 // In no event shall the Intel Corporation or contributors be liable for any direct, 00033 // indirect, incidental, special, exemplary, or consequential damages 00034 // (including, but not limited to, procurement of substitute goods or services; 00035 // loss of use, data, or profits; or business interruption) however caused 00036 // and on any theory of liability, whether in contract, strict liability, 00037 // or tort (including negligence or otherwise) arising in any way out of 00038 // the use of this software, even if advised of the possibility of such damage. 00039 // 00040 //M*/ 00041 00042 #ifndef _ncv_hpp_ 00043 #define _ncv_hpp_ 00044 00045 #if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__) 00046 #define NCV_EXPORTS __declspec(dllexport) 00047 #else 00048 #define NCV_EXPORTS 00049 #endif 00050 00051 #ifdef _WIN32 00052 #define WIN32_LEAN_AND_MEAN 00053 #endif 00054 00055 #include <cuda_runtime.h> 00056 00057 00058 //============================================================================== 00059 // 00060 // Compile-time assert functionality 00061 // 00062 //============================================================================== 00063 00064 00068 namespace NcvCTprep 00069 { 00070 template <bool x> 00071 struct CT_ASSERT_FAILURE; 00072 00073 template <> 00074 struct CT_ASSERT_FAILURE<true> {}; 00075 00076 template <int x> 00077 struct assertTest{}; 00078 } 00079 00080 00081 #define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro 00082 #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro 00083 00084 00088 #define NCV_CT_ASSERT(X) \ 00089 typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \ 00090 NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__) 00091 00092 00093 00094 //============================================================================== 00095 // 00096 // Alignment macros 00097 // 00098 //============================================================================== 00099 00100 00101 #if !defined(__align__) && !defined(__CUDACC__) 00102 #if defined(_WIN32) || defined(_WIN64) 00103 #define __align__(n) __declspec(align(n)) 00104 #elif defined(__unix__) 00105 #define __align__(n) __attribute__((__aligned__(n))) 00106 #endif 00107 #endif 00108 00109 00110 //============================================================================== 00111 // 00112 // Integral and compound types of guaranteed size 00113 // 00114 //============================================================================== 00115 00116 00117 typedef bool NcvBool; 00118 typedef long long Ncv64s; 00119 00120 #if defined(__APPLE__) && !defined(__CUDACC__) 00121 typedef uint64_t Ncv64u; 00122 #else 00123 typedef unsigned long long Ncv64u; 00124 #endif 00125 00126 typedef int Ncv32s; 00127 typedef unsigned int Ncv32u; 00128 typedef short Ncv16s; 00129 typedef unsigned short Ncv16u; 00130 typedef char Ncv8s; 00131 typedef unsigned char Ncv8u; 00132 typedef float Ncv32f; 00133 typedef double Ncv64f; 00134 00135 00136 struct NcvRect8u 00137 { 00138 Ncv8u x; 00139 Ncv8u y; 00140 Ncv8u width; 00141 Ncv8u height; 00142 __host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {}; 00143 __host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {} 00144 }; 00145 00146 00147 struct NcvRect32s 00148 { 00149 Ncv32s x; 00150 Ncv32s y; 00151 Ncv32s width; 00152 Ncv32s height; 00153 __host__ __device__ NcvRect32s() : x(0), y(0), width(0), height(0) {}; 00154 __host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {} 00155 }; 00156 00157 00158 struct NcvRect32u 00159 { 00160 Ncv32u x; 00161 Ncv32u y; 00162 Ncv32u width; 00163 Ncv32u height; 00164 __host__ __device__ NcvRect32u() : x(0), y(0), width(0), height(0) {}; 00165 __host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {} 00166 }; 00167 00168 00169 struct NcvSize32s 00170 { 00171 Ncv32s width; 00172 Ncv32s height; 00173 __host__ __device__ NcvSize32s() : width(0), height(0) {}; 00174 __host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {} 00175 }; 00176 00177 00178 struct NcvSize32u 00179 { 00180 Ncv32u width; 00181 Ncv32u height; 00182 __host__ __device__ NcvSize32u() : width(0), height(0) {}; 00183 __host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {} 00184 }; 00185 00186 00187 NCV_CT_ASSERT(sizeof(NcvBool) <= 4); 00188 NCV_CT_ASSERT(sizeof(Ncv64s) == 8); 00189 NCV_CT_ASSERT(sizeof(Ncv64u) == 8); 00190 NCV_CT_ASSERT(sizeof(Ncv32s) == 4); 00191 NCV_CT_ASSERT(sizeof(Ncv32u) == 4); 00192 NCV_CT_ASSERT(sizeof(Ncv16s) == 2); 00193 NCV_CT_ASSERT(sizeof(Ncv16u) == 2); 00194 NCV_CT_ASSERT(sizeof(Ncv8s) == 1); 00195 NCV_CT_ASSERT(sizeof(Ncv8u) == 1); 00196 NCV_CT_ASSERT(sizeof(Ncv32f) == 4); 00197 NCV_CT_ASSERT(sizeof(Ncv64f) == 8); 00198 NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u)); 00199 NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s)); 00200 NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u)); 00201 NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u)); 00202 00203 00204 //============================================================================== 00205 // 00206 // Persistent constants 00207 // 00208 //============================================================================== 00209 00210 00211 const Ncv32u K_WARP_SIZE = 32; 00212 const Ncv32u K_LOG2_WARP_SIZE = 5; 00213 00214 00215 //============================================================================== 00216 // 00217 // Error handling 00218 // 00219 //============================================================================== 00220 00221 00222 #define NCV_CT_PREP_STRINGIZE_AUX(x) #x 00223 #define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x) 00224 00225 00226 NCV_EXPORTS void ncvDebugOutput(const char *msg, ...); 00227 00228 00229 typedef void NCVDebugOutputHandler(const char* msg); 00230 00231 00232 NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func); 00233 00234 00235 #define ncvAssertPrintCheck(pred, msg) \ 00236 ((pred) ? true : (ncvDebugOutput("\n%s\n", \ 00237 "NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \ 00238 ), false)) 00239 00240 00241 #define ncvAssertPrintReturn(pred, msg, err) \ 00242 if (ncvAssertPrintCheck(pred, msg)) ; else return err 00243 00244 00245 #define ncvAssertReturn(pred, err) \ 00246 do \ 00247 { \ 00248 if (!(pred)) \ 00249 { \ 00250 ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \ 00251 return err; \ 00252 } \ 00253 } while (0) 00254 00255 00256 #define ncvAssertReturnNcvStat(ncvOp) \ 00257 do \ 00258 { \ 00259 NCVStatus _ncvStat = ncvOp; \ 00260 if (NCV_SUCCESS != _ncvStat) \ 00261 { \ 00262 ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \ 00263 return _ncvStat; \ 00264 } \ 00265 } while (0) 00266 00267 00268 #define ncvAssertCUDAReturn(cudacall, errCode) \ 00269 do \ 00270 { \ 00271 cudaError_t resCall = cudacall; \ 00272 cudaError_t resGLE = cudaGetLastError(); \ 00273 if (cudaSuccess != resCall || cudaSuccess != resGLE) \ 00274 { \ 00275 ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \ 00276 return errCode; \ 00277 } \ 00278 } while (0) 00279 00280 00284 enum NCVStatus 00285 { 00286 //NCV statuses 00287 NCV_SUCCESS, 00288 NCV_UNKNOWN_ERROR, 00289 00290 NCV_CUDA_ERROR, 00291 NCV_NPP_ERROR, 00292 NCV_FILE_ERROR, 00293 00294 NCV_NULL_PTR, 00295 NCV_INCONSISTENT_INPUT, 00296 NCV_TEXTURE_BIND_ERROR, 00297 NCV_DIMENSIONS_INVALID, 00298 00299 NCV_INVALID_ROI, 00300 NCV_INVALID_STEP, 00301 NCV_INVALID_SCALE, 00302 00303 NCV_ALLOCATOR_NOT_INITIALIZED, 00304 NCV_ALLOCATOR_BAD_ALLOC, 00305 NCV_ALLOCATOR_BAD_DEALLOC, 00306 NCV_ALLOCATOR_INSUFFICIENT_CAPACITY, 00307 NCV_ALLOCATOR_DEALLOC_ORDER, 00308 NCV_ALLOCATOR_BAD_REUSE, 00309 00310 NCV_MEM_COPY_ERROR, 00311 NCV_MEM_RESIDENCE_ERROR, 00312 NCV_MEM_INSUFFICIENT_CAPACITY, 00313 00314 NCV_HAAR_INVALID_PIXEL_STEP, 00315 NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER, 00316 NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE, 00317 NCV_HAAR_TOO_LARGE_FEATURES, 00318 NCV_HAAR_XML_LOADING_EXCEPTION, 00319 00320 NCV_NOIMPL_HAAR_TILTED_FEATURES, 00321 00322 NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW, 00323 00324 //NPP statuses 00325 NPPST_SUCCESS = NCV_SUCCESS, 00326 NPPST_ERROR, 00327 NPPST_CUDA_KERNEL_EXECUTION_ERROR, 00328 NPPST_NULL_POINTER_ERROR, 00329 NPPST_TEXTURE_BIND_ERROR, 00330 NPPST_MEMCPY_ERROR, 00331 NPPST_MEM_ALLOC_ERR, 00332 NPPST_MEMFREE_ERR, 00333 00334 //NPPST statuses 00335 NPPST_INVALID_ROI, 00336 NPPST_INVALID_STEP, 00337 NPPST_INVALID_SCALE, 00338 NPPST_MEM_INSUFFICIENT_BUFFER, 00339 NPPST_MEM_RESIDENCE_ERROR, 00340 NPPST_MEM_INTERNAL_ERROR, 00341 }; 00342 00343 00344 #define NCV_SET_SKIP_COND(x) \ 00345 bool __ncv_skip_cond = x 00346 00347 00348 #define NCV_RESET_SKIP_COND(x) \ 00349 __ncv_skip_cond = x 00350 00351 00352 #define NCV_SKIP_COND_BEGIN \ 00353 if (!__ncv_skip_cond) { 00354 00355 00356 #define NCV_SKIP_COND_END \ 00357 } 00358 00359 00360 //============================================================================== 00361 // 00362 // Timer 00363 // 00364 //============================================================================== 00365 00366 00367 typedef struct _NcvTimer *NcvTimer; 00368 00369 NCV_EXPORTS NcvTimer ncvStartTimer(void); 00370 00371 NCV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t); 00372 00373 NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t); 00374 00375 00376 //============================================================================== 00377 // 00378 // Memory management classes template compound types 00379 // 00380 //============================================================================== 00381 00382 00386 NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment); 00387 00388 00392 enum NCVMemoryType 00393 { 00394 NCVMemoryTypeNone, 00395 NCVMemoryTypeHostPageable, 00396 NCVMemoryTypeHostPinned, 00397 NCVMemoryTypeDevice 00398 }; 00399 00400 00404 struct NCV_EXPORTS NCVMemPtr 00405 { 00406 void *ptr; 00407 NCVMemoryType memtype; 00408 void clear(); 00409 }; 00410 00411 00415 struct NCV_EXPORTS NCVMemSegment 00416 { 00417 NCVMemPtr begin; 00418 size_t size; 00419 void clear(); 00420 }; 00421 00422 00426 class NCV_EXPORTS INCVMemAllocator 00427 { 00428 public: 00429 virtual ~INCVMemAllocator() = 0; 00430 00431 virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0; 00432 virtual NCVStatus dealloc(NCVMemSegment &seg) = 0; 00433 00434 virtual NcvBool isInitialized(void) const = 0; 00435 virtual NcvBool isCounting(void) const = 0; 00436 00437 virtual NCVMemoryType memType(void) const = 0; 00438 virtual Ncv32u alignment(void) const = 0; 00439 virtual size_t maxSize(void) const = 0; 00440 }; 00441 00442 inline INCVMemAllocator::~INCVMemAllocator() {} 00443 00444 00448 class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator 00449 { 00450 NCVMemStackAllocator(); 00451 NCVMemStackAllocator(const NCVMemStackAllocator &); 00452 00453 public: 00454 00455 explicit NCVMemStackAllocator(Ncv32u alignment); 00456 NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL); 00457 virtual ~NCVMemStackAllocator(); 00458 00459 virtual NCVStatus alloc(NCVMemSegment &seg, size_t size); 00460 virtual NCVStatus dealloc(NCVMemSegment &seg); 00461 00462 virtual NcvBool isInitialized(void) const; 00463 virtual NcvBool isCounting(void) const; 00464 00465 virtual NCVMemoryType memType(void) const; 00466 virtual Ncv32u alignment(void) const; 00467 virtual size_t maxSize(void) const; 00468 00469 private: 00470 00471 NCVMemoryType _memType; 00472 Ncv32u _alignment; 00473 Ncv8u *allocBegin; 00474 Ncv8u *begin; 00475 Ncv8u *end; 00476 size_t currentSize; 00477 size_t _maxSize; 00478 NcvBool bReusesMemory; 00479 }; 00480 00481 00485 class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator 00486 { 00487 public: 00488 00489 NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment); 00490 virtual ~NCVMemNativeAllocator(); 00491 00492 virtual NCVStatus alloc(NCVMemSegment &seg, size_t size); 00493 virtual NCVStatus dealloc(NCVMemSegment &seg); 00494 00495 virtual NcvBool isInitialized(void) const; 00496 virtual NcvBool isCounting(void) const; 00497 00498 virtual NCVMemoryType memType(void) const; 00499 virtual Ncv32u alignment(void) const; 00500 virtual size_t maxSize(void) const; 00501 00502 private: 00503 00504 NCVMemNativeAllocator(); 00505 NCVMemNativeAllocator(const NCVMemNativeAllocator &); 00506 00507 NCVMemoryType _memType; 00508 Ncv32u _alignment; 00509 size_t currentSize; 00510 size_t _maxSize; 00511 }; 00512 00513 00517 NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, 00518 const void *src, NCVMemoryType srcType, 00519 size_t sz, cudaStream_t cuStream); 00520 00521 00522 NCV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType, 00523 const void *src, Ncv32u srcPitch, NCVMemoryType srcType, 00524 Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream); 00525 00526 00530 template <class T> 00531 class NCVVector 00532 { 00533 NCVVector(const NCVVector &); 00534 00535 public: 00536 00537 NCVVector() 00538 { 00539 clear(); 00540 } 00541 00542 virtual ~NCVVector() {} 00543 00544 void clear() 00545 { 00546 _ptr = NULL; 00547 _length = 0; 00548 _memtype = NCVMemoryTypeNone; 00549 } 00550 00551 NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const 00552 { 00553 if (howMuch == 0) 00554 { 00555 ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR); 00556 howMuch = this->_length * sizeof(T); 00557 } 00558 else 00559 { 00560 ncvAssertReturn(dst._length * sizeof(T) >= howMuch && 00561 this->_length * sizeof(T) >= howMuch && 00562 howMuch > 0, NCV_MEM_COPY_ERROR); 00563 } 00564 ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && 00565 (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR); 00566 00567 NCVStatus ncvStat = NCV_SUCCESS; 00568 if (this->_memtype != NCVMemoryTypeNone) 00569 { 00570 ncvStat = memSegCopyHelper(dst._ptr, dst._memtype, 00571 this->_ptr, this->_memtype, 00572 howMuch, cuStream); 00573 } 00574 00575 return ncvStat; 00576 } 00577 00578 T *ptr() const {return this->_ptr;} 00579 size_t length() const {return this->_length;} 00580 NCVMemoryType memType() const {return this->_memtype;} 00581 00582 protected: 00583 00584 T *_ptr; 00585 size_t _length; 00586 NCVMemoryType _memtype; 00587 }; 00588 00589 00593 template <class T> 00594 class NCVVectorAlloc : public NCVVector<T> 00595 { 00596 NCVVectorAlloc(); 00597 NCVVectorAlloc(const NCVVectorAlloc &); 00598 NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&); 00599 00600 public: 00601 00602 NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length) 00603 : 00604 allocator(allocator) 00605 { 00606 NCVStatus ncvStat; 00607 00608 this->clear(); 00609 this->allocatedMem.clear(); 00610 00611 ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T)); 00612 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", ); 00613 00614 this->_ptr = (T *)this->allocatedMem.begin.ptr; 00615 this->_length = length; 00616 this->_memtype = this->allocatedMem.begin.memtype; 00617 } 00618 00619 ~NCVVectorAlloc() 00620 { 00621 NCVStatus ncvStat; 00622 00623 ncvStat = allocator.dealloc(this->allocatedMem); 00624 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed"); 00625 00626 this->clear(); 00627 } 00628 00629 NcvBool isMemAllocated() const 00630 { 00631 return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting()); 00632 } 00633 00634 Ncv32u getAllocatorsAlignment() const 00635 { 00636 return allocator.alignment(); 00637 } 00638 00639 NCVMemSegment getSegment() const 00640 { 00641 return allocatedMem; 00642 } 00643 00644 private: 00645 INCVMemAllocator &allocator; 00646 NCVMemSegment allocatedMem; 00647 }; 00648 00649 00653 template <class T> 00654 class NCVVectorReuse : public NCVVector<T> 00655 { 00656 NCVVectorReuse(); 00657 NCVVectorReuse(const NCVVectorReuse &); 00658 00659 public: 00660 00661 explicit NCVVectorReuse(const NCVMemSegment &memSegment) 00662 { 00663 this->bReused = false; 00664 this->clear(); 00665 00666 this->_length = memSegment.size / sizeof(T); 00667 this->_ptr = (T *)memSegment.begin.ptr; 00668 this->_memtype = memSegment.begin.memtype; 00669 00670 this->bReused = true; 00671 } 00672 00673 NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length) 00674 { 00675 this->bReused = false; 00676 this->clear(); 00677 00678 ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \ 00679 "NCVVectorReuse ctor:: memory binding failed due to size mismatch", ); 00680 00681 this->_length = length; 00682 this->_ptr = (T *)memSegment.begin.ptr; 00683 this->_memtype = memSegment.begin.memtype; 00684 00685 this->bReused = true; 00686 } 00687 00688 NcvBool isMemReused() const 00689 { 00690 return this->bReused; 00691 } 00692 00693 private: 00694 00695 NcvBool bReused; 00696 }; 00697 00698 00702 template <class T> 00703 class NCVMatrix 00704 { 00705 NCVMatrix(const NCVMatrix &); 00706 00707 public: 00708 00709 NCVMatrix() 00710 { 00711 clear(); 00712 } 00713 00714 virtual ~NCVMatrix() {} 00715 00716 void clear() 00717 { 00718 _ptr = NULL; 00719 _pitch = 0; 00720 _width = 0; 00721 _height = 0; 00722 _memtype = NCVMemoryTypeNone; 00723 } 00724 00725 Ncv32u stride() const 00726 { 00727 return _pitch / sizeof(T); 00728 } 00729 00730 //a side effect of this function is that it copies everything in a single chunk, so the "padding" will be overwritten 00731 NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const 00732 { 00733 if (howMuch == 0) 00734 { 00735 ncvAssertReturn(dst._pitch == this->_pitch && 00736 dst._height == this->_height, NCV_MEM_COPY_ERROR); 00737 howMuch = this->_pitch * this->_height; 00738 } 00739 else 00740 { 00741 ncvAssertReturn(dst._pitch * dst._height >= howMuch && 00742 this->_pitch * this->_height >= howMuch && 00743 howMuch > 0, NCV_MEM_COPY_ERROR); 00744 } 00745 ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && 00746 (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR); 00747 00748 NCVStatus ncvStat = NCV_SUCCESS; 00749 if (this->_memtype != NCVMemoryTypeNone) 00750 { 00751 ncvStat = memSegCopyHelper(dst._ptr, dst._memtype, 00752 this->_ptr, this->_memtype, 00753 howMuch, cuStream); 00754 } 00755 00756 return ncvStat; 00757 } 00758 00759 NCVStatus copy2D(NCVMatrix<T> &dst, NcvSize32u roi, cudaStream_t cuStream) const 00760 { 00761 ncvAssertReturn(this->width() >= roi.width && this->height() >= roi.height && 00762 dst.width() >= roi.width && dst.height() >= roi.height, NCV_MEM_COPY_ERROR); 00763 ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && 00764 (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR); 00765 00766 NCVStatus ncvStat = NCV_SUCCESS; 00767 if (this->_memtype != NCVMemoryTypeNone) 00768 { 00769 ncvStat = memSegCopyHelper2D(dst._ptr, dst._pitch, dst._memtype, 00770 this->_ptr, this->_pitch, this->_memtype, 00771 roi.width * sizeof(T), roi.height, cuStream); 00772 } 00773 00774 return ncvStat; 00775 } 00776 00777 T *ptr() const {return this->_ptr;} 00778 Ncv32u width() const {return this->_width;} 00779 Ncv32u height() const {return this->_height;} 00780 Ncv32u pitch() const {return this->_pitch;} 00781 NCVMemoryType memType() const {return this->_memtype;} 00782 00783 protected: 00784 00785 T *_ptr; 00786 Ncv32u _width; 00787 Ncv32u _height; 00788 Ncv32u _pitch; 00789 NCVMemoryType _memtype; 00790 }; 00791 00792 00796 template <class T> 00797 class NCVMatrixAlloc : public NCVMatrix<T> 00798 { 00799 NCVMatrixAlloc(); 00800 NCVMatrixAlloc(const NCVMatrixAlloc &); 00801 NCVMatrixAlloc& operator=(const NCVMatrixAlloc &); 00802 public: 00803 00804 NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0) 00805 : 00806 allocator(allocator) 00807 { 00808 NCVStatus ncvStat; 00809 00810 this->clear(); 00811 this->allocatedMem.clear(); 00812 00813 Ncv32u widthBytes = width * sizeof(T); 00814 Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment()); 00815 00816 if (pitch != 0) 00817 { 00818 ncvAssertPrintReturn(pitch >= pitchBytes && 00819 (pitch & (allocator.alignment() - 1)) == 0, 00820 "NCVMatrixAlloc ctor:: incorrect pitch passed", ); 00821 pitchBytes = pitch; 00822 } 00823 00824 Ncv32u requiredAllocSize = pitchBytes * height; 00825 00826 ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize); 00827 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", ); 00828 00829 this->_ptr = (T *)this->allocatedMem.begin.ptr; 00830 this->_width = width; 00831 this->_height = height; 00832 this->_pitch = pitchBytes; 00833 this->_memtype = this->allocatedMem.begin.memtype; 00834 } 00835 00836 ~NCVMatrixAlloc() 00837 { 00838 NCVStatus ncvStat; 00839 00840 ncvStat = allocator.dealloc(this->allocatedMem); 00841 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed"); 00842 00843 this->clear(); 00844 } 00845 00846 NcvBool isMemAllocated() const 00847 { 00848 return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting()); 00849 } 00850 00851 Ncv32u getAllocatorsAlignment() const 00852 { 00853 return allocator.alignment(); 00854 } 00855 00856 NCVMemSegment getSegment() const 00857 { 00858 return allocatedMem; 00859 } 00860 00861 private: 00862 00863 INCVMemAllocator &allocator; 00864 NCVMemSegment allocatedMem; 00865 }; 00866 00867 00871 template <class T> 00872 class NCVMatrixReuse : public NCVMatrix<T> 00873 { 00874 NCVMatrixReuse(); 00875 NCVMatrixReuse(const NCVMatrixReuse &); 00876 00877 public: 00878 00879 NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false) 00880 { 00881 this->bReused = false; 00882 this->clear(); 00883 00884 Ncv32u widthBytes = width * sizeof(T); 00885 Ncv32u pitchBytes = alignUp(widthBytes, alignment); 00886 00887 if (pitch != 0) 00888 { 00889 if (!bSkipPitchCheck) 00890 { 00891 ncvAssertPrintReturn(pitch >= pitchBytes && 00892 (pitch & (alignment - 1)) == 0, 00893 "NCVMatrixReuse ctor:: incorrect pitch passed", ); 00894 } 00895 else 00896 { 00897 ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", ); 00898 } 00899 pitchBytes = pitch; 00900 } 00901 00902 ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \ 00903 "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", ); 00904 00905 this->_width = width; 00906 this->_height = height; 00907 this->_pitch = pitchBytes; 00908 this->_ptr = (T *)memSegment.begin.ptr; 00909 this->_memtype = memSegment.begin.memtype; 00910 00911 this->bReused = true; 00912 } 00913 00914 NCVMatrixReuse(const NCVMatrix<T> &mat, NcvRect32u roi) 00915 { 00916 this->bReused = false; 00917 this->clear(); 00918 00919 ncvAssertPrintReturn(roi.x < mat.width() && roi.y < mat.height() && \ 00920 roi.x + roi.width <= mat.width() && roi.y + roi.height <= mat.height(), 00921 "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", ); 00922 00923 this->_width = roi.width; 00924 this->_height = roi.height; 00925 this->_pitch = mat.pitch(); 00926 this->_ptr = mat.ptr() + roi.y * mat.stride() + roi.x; 00927 this->_memtype = mat.memType(); 00928 00929 this->bReused = true; 00930 } 00931 00932 NcvBool isMemReused() const 00933 { 00934 return this->bReused; 00935 } 00936 00937 private: 00938 00939 NcvBool bReused; 00940 }; 00941 00942 00946 NCV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses, 00947 Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights); 00948 00949 00950 NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight, 00951 NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color); 00952 00953 00954 NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight, 00955 NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color); 00956 00957 00958 NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight, 00959 NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream); 00960 00961 00962 NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight, 00963 NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream); 00964 00965 #endif // _ncv_hpp_