| /*M/////////////////////////////////////////////////////////////////////////////////////// |
| // |
| // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
| // |
| // By downloading, copying, installing or using the software you agree to this license. |
| // If you do not agree to this license, do not download, install, |
| // copy or use the software. |
| // |
| // |
| // License Agreement |
| // For Open Source Computer Vision Library |
| // |
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
| // Third party copyrights are property of their respective owners. |
| // |
| // Redistribution and use in source and binary forms, with or without modification, |
| // are permitted provided that the following conditions are met: |
| // |
| // * Redistribution's of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // |
| // * Redistribution's in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // * The name of the copyright holders may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // This software is provided by the copyright holders and contributors "as is" and |
| // any express or implied warranties, including, but not limited to, the implied |
| // warranties of merchantability and fitness for a particular purpose are disclaimed. |
| // In no event shall the Intel Corporation or contributors be liable for any direct, |
| // indirect, incidental, special, exemplary, or consequential damages |
| // (including, but not limited to, procurement of substitute goods or services; |
| // loss of use, data, or profits; or business interruption) however caused |
| // and on any theory of liability, whether in contract, strict liability, |
| // or tort (including negligence or otherwise) arising in any way out of |
| // the use of this software, even if advised of the possibility of such damage. |
| // |
| //M*/ |
| |
| #include "precomp.hpp" |
| #include "opencv2/objdetect/objdetect_c.h" |
| |
| using namespace cv; |
| using namespace cv::cuda; |
| |
| #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) |
| |
| Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); } |
| Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); } |
| |
| #else |
| |
| // |
| // CascadeClassifierBase |
| // |
| |
| namespace |
| { |
| class CascadeClassifierBase : public cuda::CascadeClassifier |
| { |
| public: |
| CascadeClassifierBase(); |
| |
| virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; } |
| virtual Size getMaxObjectSize() const { return maxObjectSize_; } |
| |
| virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; } |
| virtual Size getMinObjectSize() const { return minObjectSize_; } |
| |
| virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; } |
| virtual double getScaleFactor() const { return scaleFactor_; } |
| |
| virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; } |
| virtual int getMinNeighbors() const { return minNeighbors_; } |
| |
| virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; } |
| virtual bool getFindLargestObject() { return findLargestObject_; } |
| |
| virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; } |
| virtual int getMaxNumObjects() const { return maxNumObjects_; } |
| |
| protected: |
| Size maxObjectSize_; |
| Size minObjectSize_; |
| double scaleFactor_; |
| int minNeighbors_; |
| bool findLargestObject_; |
| int maxNumObjects_; |
| }; |
| |
| CascadeClassifierBase::CascadeClassifierBase() : |
| maxObjectSize_(), |
| minObjectSize_(), |
| scaleFactor_(1.2), |
| minNeighbors_(4), |
| findLargestObject_(false), |
| maxNumObjects_(100) |
| { |
| } |
| } |
| |
| // |
| // HaarCascade |
| // |
| |
| #ifdef HAVE_OPENCV_CUDALEGACY |
| |
| namespace |
| { |
| class HaarCascade_Impl : public CascadeClassifierBase |
| { |
| public: |
| explicit HaarCascade_Impl(const String& filename); |
| |
| virtual Size getClassifierSize() const; |
| |
| virtual void detectMultiScale(InputArray image, |
| OutputArray objects, |
| Stream& stream); |
| |
| virtual void convert(OutputArray gpu_objects, |
| std::vector<Rect>& objects); |
| |
| private: |
| NCVStatus load(const String& classifierFile); |
| NCVStatus calculateMemReqsAndAllocate(const Size& frameSize); |
| NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections); |
| |
| Size lastAllocatedFrameSize; |
| |
| Ptr<NCVMemStackAllocator> gpuAllocator; |
| Ptr<NCVMemStackAllocator> cpuAllocator; |
| |
| cudaDeviceProp devProp; |
| NCVStatus ncvStat; |
| |
| Ptr<NCVMemNativeAllocator> gpuCascadeAllocator; |
| Ptr<NCVMemNativeAllocator> cpuCascadeAllocator; |
| |
| Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages; |
| Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes; |
| Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures; |
| |
| HaarClassifierCascadeDescriptor haar; |
| |
| Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages; |
| Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes; |
| Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures; |
| }; |
| |
| static void NCVDebugOutputHandler(const String &msg) |
| { |
| CV_Error(Error::GpuApiCallError, msg.c_str()); |
| } |
| |
| HaarCascade_Impl::HaarCascade_Impl(const String& filename) : |
| lastAllocatedFrameSize(-1, -1) |
| { |
| ncvSetDebugOutputHandler(NCVDebugOutputHandler); |
| ncvSafeCall( load(filename) ); |
| } |
| |
| Size HaarCascade_Impl::getClassifierSize() const |
| { |
| return Size(haar.ClassifierSize.width, haar.ClassifierSize.height); |
| } |
| |
| void HaarCascade_Impl::detectMultiScale(InputArray _image, |
| OutputArray _objects, |
| Stream& stream) |
| { |
| const GpuMat image = _image.getGpuMat(); |
| |
| CV_Assert( image.depth() == CV_8U); |
| CV_Assert( scaleFactor_ > 1 ); |
| CV_Assert( !stream ); |
| |
| Size ncvMinSize = getClassifierSize(); |
| if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height) |
| { |
| ncvMinSize.width = minObjectSize_.width; |
| ncvMinSize.height = minObjectSize_.height; |
| } |
| |
| BufferPool pool(stream); |
| GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type); |
| |
| unsigned int numDetections; |
| ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) ); |
| |
| if (numDetections > 0) |
| { |
| objectsBuf.colRange(0, numDetections).copyTo(_objects); |
| } |
| else |
| { |
| _objects.release(); |
| } |
| } |
| |
| void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects) |
| { |
| if (_gpu_objects.empty()) |
| { |
| objects.clear(); |
| return; |
| } |
| |
| Mat gpu_objects; |
| if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT) |
| { |
| _gpu_objects.getGpuMat().download(gpu_objects); |
| } |
| else |
| { |
| gpu_objects = _gpu_objects.getMat(); |
| } |
| |
| CV_Assert( gpu_objects.rows == 1 ); |
| CV_Assert( gpu_objects.type() == DataType<Rect>::type ); |
| |
| Rect* ptr = gpu_objects.ptr<Rect>(); |
| objects.assign(ptr, ptr + gpu_objects.cols); |
| } |
| |
| NCVStatus HaarCascade_Impl::load(const String& classifierFile) |
| { |
| int devId = cv::cuda::getDevice(); |
| ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR); |
| |
| // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator |
| gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment)); |
| cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment)); |
| |
| ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR); |
| |
| Ncv32u haarNumStages, haarNumNodes, haarNumFeatures; |
| ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures); |
| ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR); |
| |
| h_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages)); |
| h_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes)); |
| h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures)); |
| |
| ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); |
| |
| ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures); |
| ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR); |
| |
| d_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages)); |
| d_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes)); |
| d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures)); |
| |
| ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); |
| |
| ncvStat = h_haarStages->copySolid(*d_haarStages, 0); |
| ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); |
| ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0); |
| ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); |
| ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0); |
| ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); |
| |
| return NCV_SUCCESS; |
| } |
| |
| NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize) |
| { |
| if (lastAllocatedFrameSize == frameSize) |
| { |
| return NCV_SUCCESS; |
| } |
| |
| // Calculate memory requirements and create real allocators |
| NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment)); |
| NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment)); |
| |
| ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR); |
| |
| NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height); |
| NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height); |
| |
| ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); |
| ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); |
| |
| NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100); |
| ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); |
| |
| NcvSize32u roi; |
| roi.width = d_src.width(); |
| roi.height = d_src.height(); |
| Ncv32u numDetections; |
| ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages, |
| *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0); |
| |
| ncvAssertReturnNcvStat(ncvStat); |
| ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); |
| |
| gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment)); |
| cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment)); |
| |
| ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR); |
| ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR); |
| |
| lastAllocatedFrameSize = frameSize; |
| return NCV_SUCCESS; |
| } |
| |
| NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections) |
| { |
| calculateMemReqsAndAllocate(src.size()); |
| |
| NCVMemPtr src_beg; |
| src_beg.ptr = (void*)src.ptr<Ncv8u>(); |
| src_beg.memtype = NCVMemoryTypeDevice; |
| |
| NCVMemSegment src_seg; |
| src_seg.begin = src_beg; |
| src_seg.size = src.step * src.rows; |
| |
| NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true); |
| ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); |
| |
| CV_Assert(objects.rows == 1); |
| |
| NCVMemPtr objects_beg; |
| objects_beg.ptr = (void*)objects.ptr<NcvRect32u>(); |
| objects_beg.memtype = NCVMemoryTypeDevice; |
| |
| NCVMemSegment objects_seg; |
| objects_seg.begin = objects_beg; |
| objects_seg.size = objects.step * objects.rows; |
| NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols); |
| ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); |
| |
| NcvSize32u roi; |
| roi.width = d_src.width(); |
| roi.height = d_src.height(); |
| |
| NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height); |
| |
| Ncv32u flags = 0; |
| flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0; |
| |
| ncvStat = ncvDetectObjectsMultiScale_device( |
| d_src, roi, d_rects, numDetections, haar, *h_haarStages, |
| *d_haarStages, *d_haarNodes, *d_haarFeatures, |
| winMinSize, |
| minNeighbors_, |
| scaleFactor_, 1, |
| flags, |
| *gpuAllocator, *cpuAllocator, devProp, 0); |
| ncvAssertReturnNcvStat(ncvStat); |
| ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); |
| |
| return NCV_SUCCESS; |
| } |
| } |
| |
| #endif |
| |
| // |
| // LbpCascade |
| // |
| |
| namespace cv { namespace cuda { namespace device |
| { |
| namespace lbp |
| { |
| void classifyPyramid(int frameW, |
| int frameH, |
| int windowW, |
| int windowH, |
| float initalScale, |
| float factor, |
| int total, |
| const PtrStepSzb& mstages, |
| const int nstages, |
| const PtrStepSzi& mnodes, |
| const PtrStepSzf& mleaves, |
| const PtrStepSzi& msubsets, |
| const PtrStepSzb& mfeatures, |
| const int subsetSize, |
| PtrStepSz<int4> objects, |
| unsigned int* classified, |
| PtrStepSzi integral); |
| |
| void connectedConmonents(PtrStepSz<int4> candidates, |
| int ncandidates, |
| PtrStepSz<int4> objects, |
| int groupThreshold, |
| float grouping_eps, |
| unsigned int* nclasses); |
| } |
| }}} |
| |
| namespace |
| { |
| cv::Size operator -(const cv::Size& a, const cv::Size& b) |
| { |
| return cv::Size(a.width - b.width, a.height - b.height); |
| } |
| |
| cv::Size operator +(const cv::Size& a, const int& i) |
| { |
| return cv::Size(a.width + i, a.height + i); |
| } |
| |
| cv::Size operator *(const cv::Size& a, const float& f) |
| { |
| return cv::Size(cvRound(a.width * f), cvRound(a.height * f)); |
| } |
| |
| cv::Size operator /(const cv::Size& a, const float& f) |
| { |
| return cv::Size(cvRound(a.width / f), cvRound(a.height / f)); |
| } |
| |
| bool operator <=(const cv::Size& a, const cv::Size& b) |
| { |
| return a.width <= b.width && a.height <= b.width; |
| } |
| |
| struct PyrLavel |
| { |
| PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize) |
| { |
| do |
| { |
| order = _order; |
| scale = pow(_scale, order); |
| sFrame = frame / scale; |
| workArea = sFrame - window + 1; |
| sWindow = window * scale; |
| _order++; |
| } while (sWindow <= minObjectSize); |
| } |
| |
| bool isFeasible(cv::Size maxObj) |
| { |
| return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj; |
| } |
| |
| PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize) |
| { |
| return PyrLavel(order + 1, factor, frame, window, minObjectSize); |
| } |
| |
| int order; |
| float scale; |
| cv::Size sFrame; |
| cv::Size workArea; |
| cv::Size sWindow; |
| }; |
| |
| class LbpCascade_Impl : public CascadeClassifierBase |
| { |
| public: |
| explicit LbpCascade_Impl(const FileStorage& file); |
| |
| virtual Size getClassifierSize() const { return NxM; } |
| |
| virtual void detectMultiScale(InputArray image, |
| OutputArray objects, |
| Stream& stream); |
| |
| virtual void convert(OutputArray gpu_objects, |
| std::vector<Rect>& objects); |
| |
| private: |
| bool load(const FileNode &root); |
| void allocateBuffers(cv::Size frame); |
| |
| private: |
| struct Stage |
| { |
| int first; |
| int ntrees; |
| float threshold; |
| }; |
| |
| enum stage { BOOST = 0 }; |
| enum feature { LBP = 1, HAAR = 2 }; |
| |
| static const stage stageType = BOOST; |
| static const feature featureType = LBP; |
| |
| cv::Size NxM; |
| bool isStumps; |
| int ncategories; |
| int subsetSize; |
| int nodeStep; |
| |
| // gpu representation of classifier |
| GpuMat stage_mat; |
| GpuMat trees_mat; |
| GpuMat nodes_mat; |
| GpuMat leaves_mat; |
| GpuMat subsets_mat; |
| GpuMat features_mat; |
| |
| GpuMat integral; |
| GpuMat integralBuffer; |
| GpuMat resuzeBuffer; |
| |
| GpuMat candidates; |
| static const int integralFactor = 4; |
| }; |
| |
| LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file) |
| { |
| load(file.getFirstTopLevelNode()); |
| } |
| |
| void LbpCascade_Impl::detectMultiScale(InputArray _image, |
| OutputArray _objects, |
| Stream& stream) |
| { |
| const GpuMat image = _image.getGpuMat(); |
| |
| CV_Assert( image.depth() == CV_8U); |
| CV_Assert( scaleFactor_ > 1 ); |
| CV_Assert( !stream ); |
| |
| const float grouping_eps = 0.2f; |
| |
| BufferPool pool(stream); |
| GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type); |
| |
| // used for debug |
| // candidates.setTo(cv::Scalar::all(0)); |
| // objects.setTo(cv::Scalar::all(0)); |
| |
| if (maxObjectSize_ == cv::Size()) |
| maxObjectSize_ = image.size(); |
| |
| allocateBuffers(image.size()); |
| |
| unsigned int classified = 0; |
| GpuMat dclassified(1, 1, CV_32S); |
| cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); |
| |
| PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_); |
| |
| while (level.isFeasible(maxObjectSize_)) |
| { |
| int acc = level.sFrame.width + 1; |
| float iniScale = level.scale; |
| |
| cv::Size area = level.workArea; |
| int step = 1 + (level.scale <= 2.f); |
| |
| int total = 0, prev = 0; |
| |
| while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_)) |
| { |
| // create sutable matrix headers |
| GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height)); |
| GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1)); |
| |
| // generate integral for scale |
| cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR); |
| cuda::integral(src, sint); |
| |
| // calculate job |
| int totalWidth = level.workArea.width / step; |
| total += totalWidth * (level.workArea.height / step); |
| |
| // go to next pyramide level |
| level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_); |
| area = level.workArea; |
| |
| step = (1 + (level.scale <= 2.f)); |
| prev = acc; |
| acc += level.sFrame.width + 1; |
| } |
| |
| device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, |
| leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral); |
| } |
| |
| if (minNeighbors_ <= 0 || objects.empty()) |
| return; |
| |
| cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); |
| device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>()); |
| |
| cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); |
| cudaSafeCall( cudaDeviceSynchronize() ); |
| |
| if (classified > 0) |
| { |
| objects.colRange(0, classified).copyTo(_objects); |
| } |
| else |
| { |
| _objects.release(); |
| } |
| } |
| |
| void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects) |
| { |
| if (_gpu_objects.empty()) |
| { |
| objects.clear(); |
| return; |
| } |
| |
| Mat gpu_objects; |
| if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT) |
| { |
| _gpu_objects.getGpuMat().download(gpu_objects); |
| } |
| else |
| { |
| gpu_objects = _gpu_objects.getMat(); |
| } |
| |
| CV_Assert( gpu_objects.rows == 1 ); |
| CV_Assert( gpu_objects.type() == DataType<Rect>::type ); |
| |
| Rect* ptr = gpu_objects.ptr<Rect>(); |
| objects.assign(ptr, ptr + gpu_objects.cols); |
| } |
| |
| bool LbpCascade_Impl::load(const FileNode &root) |
| { |
| const char *CUDA_CC_STAGE_TYPE = "stageType"; |
| const char *CUDA_CC_FEATURE_TYPE = "featureType"; |
| const char *CUDA_CC_BOOST = "BOOST"; |
| const char *CUDA_CC_LBP = "LBP"; |
| const char *CUDA_CC_MAX_CAT_COUNT = "maxCatCount"; |
| const char *CUDA_CC_HEIGHT = "height"; |
| const char *CUDA_CC_WIDTH = "width"; |
| const char *CUDA_CC_STAGE_PARAMS = "stageParams"; |
| const char *CUDA_CC_MAX_DEPTH = "maxDepth"; |
| const char *CUDA_CC_FEATURE_PARAMS = "featureParams"; |
| const char *CUDA_CC_STAGES = "stages"; |
| const char *CUDA_CC_STAGE_THRESHOLD = "stageThreshold"; |
| const float CUDA_THRESHOLD_EPS = 1e-5f; |
| const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers"; |
| const char *CUDA_CC_INTERNAL_NODES = "internalNodes"; |
| const char *CUDA_CC_LEAF_VALUES = "leafValues"; |
| const char *CUDA_CC_FEATURES = "features"; |
| const char *CUDA_CC_RECT = "rect"; |
| |
| String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE]; |
| CV_Assert(stageTypeStr == CUDA_CC_BOOST); |
| |
| String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE]; |
| CV_Assert(featureTypeStr == CUDA_CC_LBP); |
| |
| NxM.width = (int)root[CUDA_CC_WIDTH]; |
| NxM.height = (int)root[CUDA_CC_HEIGHT]; |
| CV_Assert( NxM.height > 0 && NxM.width > 0 ); |
| |
| isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false; |
| CV_Assert(isStumps); |
| |
| FileNode fn = root[CUDA_CC_FEATURE_PARAMS]; |
| if (fn.empty()) |
| return false; |
| |
| ncategories = fn[CUDA_CC_MAX_CAT_COUNT]; |
| |
| subsetSize = (ncategories + 31) / 32; |
| nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 ); |
| |
| fn = root[CUDA_CC_STAGES]; |
| if (fn.empty()) |
| return false; |
| |
| std::vector<Stage> stages; |
| stages.reserve(fn.size()); |
| |
| std::vector<int> cl_trees; |
| std::vector<int> cl_nodes; |
| std::vector<float> cl_leaves; |
| std::vector<int> subsets; |
| |
| FileNodeIterator it = fn.begin(), it_end = fn.end(); |
| for (size_t si = 0; it != it_end; si++, ++it ) |
| { |
| FileNode fns = *it; |
| Stage st; |
| st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS; |
| |
| fns = fns[CUDA_CC_WEAK_CLASSIFIERS]; |
| if (fns.empty()) |
| return false; |
| |
| st.ntrees = (int)fns.size(); |
| st.first = (int)cl_trees.size(); |
| |
| stages.push_back(st);// (int, int, float) |
| |
| cl_trees.reserve(stages[si].first + stages[si].ntrees); |
| |
| // weak trees |
| FileNodeIterator it1 = fns.begin(), it1_end = fns.end(); |
| for ( ; it1 != it1_end; ++it1 ) |
| { |
| FileNode fnw = *it1; |
| |
| FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES]; |
| FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES]; |
| if ( internalNodes.empty() || leafValues.empty() ) |
| return false; |
| |
| int nodeCount = (int)internalNodes.size()/nodeStep; |
| cl_trees.push_back(nodeCount); |
| |
| cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3); |
| cl_leaves.reserve(cl_leaves.size() + leafValues.size()); |
| |
| if( subsetSize > 0 ) |
| subsets.reserve(subsets.size() + nodeCount * subsetSize); |
| |
| // nodes |
| FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end(); |
| |
| for( ; iIt != iEnd; ) |
| { |
| cl_nodes.push_back((int)*(iIt++)); |
| cl_nodes.push_back((int)*(iIt++)); |
| cl_nodes.push_back((int)*(iIt++)); |
| |
| if( subsetSize > 0 ) |
| for( int j = 0; j < subsetSize; j++, ++iIt ) |
| subsets.push_back((int)*iIt); |
| } |
| |
| // leaves |
| iIt = leafValues.begin(), iEnd = leafValues.end(); |
| for( ; iIt != iEnd; ++iIt ) |
| cl_leaves.push_back((float)*iIt); |
| } |
| } |
| |
| fn = root[CUDA_CC_FEATURES]; |
| if( fn.empty() ) |
| return false; |
| std::vector<uchar> features; |
| features.reserve(fn.size() * 4); |
| FileNodeIterator f_it = fn.begin(), f_end = fn.end(); |
| for (; f_it != f_end; ++f_it) |
| { |
| FileNode rect = (*f_it)[CUDA_CC_RECT]; |
| FileNodeIterator r_it = rect.begin(); |
| features.push_back(saturate_cast<uchar>((int)*(r_it++))); |
| features.push_back(saturate_cast<uchar>((int)*(r_it++))); |
| features.push_back(saturate_cast<uchar>((int)*(r_it++))); |
| features.push_back(saturate_cast<uchar>((int)*(r_it++))); |
| } |
| |
| // copy data structures on gpu |
| stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) )); |
| trees_mat.upload(cv::Mat(cl_trees).reshape(1,1)); |
| nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1)); |
| leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1)); |
| subsets_mat.upload(cv::Mat(subsets).reshape(1,1)); |
| features_mat.upload(cv::Mat(features).reshape(4,1)); |
| |
| return true; |
| } |
| |
| void LbpCascade_Impl::allocateBuffers(cv::Size frame) |
| { |
| if (frame == cv::Size()) |
| return; |
| |
| if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows) |
| { |
| resuzeBuffer.create(frame, CV_8UC1); |
| |
| integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1); |
| |
| #ifdef HAVE_OPENCV_CUDALEGACY |
| NcvSize32u roiSize; |
| roiSize.width = frame.width; |
| roiSize.height = frame.height; |
| |
| cudaDeviceProp prop; |
| cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) ); |
| |
| Ncv32u bufSize; |
| ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); |
| integralBuffer.create(1, bufSize, CV_8UC1); |
| #endif |
| |
| candidates.create(1 , frame.width >> 1, CV_32SC4); |
| } |
| } |
| |
| } |
| |
| // |
| // create |
| // |
| |
| Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename) |
| { |
| String fext = filename.substr(filename.find_last_of(".") + 1); |
| fext = fext.toLowerCase(); |
| |
| if (fext == "nvbin") |
| { |
| #ifndef HAVE_OPENCV_CUDALEGACY |
| CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); |
| return Ptr<cuda::CascadeClassifier>(); |
| #else |
| return makePtr<HaarCascade_Impl>(filename); |
| #endif |
| } |
| |
| FileStorage fs(filename, FileStorage::READ); |
| |
| if (!fs.isOpened()) |
| { |
| #ifndef HAVE_OPENCV_CUDALEGACY |
| CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); |
| return Ptr<cuda::CascadeClassifier>(); |
| #else |
| return makePtr<HaarCascade_Impl>(filename); |
| #endif |
| } |
| |
| const char *CUDA_CC_LBP = "LBP"; |
| String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"]; |
| if (featureTypeStr == CUDA_CC_LBP) |
| { |
| return makePtr<LbpCascade_Impl>(fs); |
| } |
| else |
| { |
| #ifndef HAVE_OPENCV_CUDALEGACY |
| CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); |
| return Ptr<cuda::CascadeClassifier>(); |
| #else |
| return makePtr<HaarCascade_Impl>(filename); |
| #endif |
| } |
| |
| CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier"); |
| return Ptr<cuda::CascadeClassifier>(); |
| } |
| |
| Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file) |
| { |
| return makePtr<LbpCascade_Impl>(file); |
| } |
| |
| #endif |