1、OpenCV环境下CUDA编程示例OpenCV环境下CUDA编程示例 在CUDA平台上对图像算法进行并行加速是目前并行计算方面比较简单易行的一种方式,而同时利用OpenCV提供的一些库函数的话,那么事情将会变得更加easy。以下是我个人采用的一种模板,这个模板是从OpenCV里的算法CUDA源码挖掘出来的,我感觉这个用起来比较傲方便,所以经常采用。首先大牛们写的源码都很鲁棒,考虑的比较全面(如大部分算法将1,3,4通道的图像同时搞定),感觉还有一个比较神奇的地方在于CPU端GpuMat和GPU端PtrStepSzb的转换,让我欲罢不能,一个不太理想的地方在于第一帧的初始化时间比较长,应该是CP
2、U到GPU的数据传输。代码中有考虑流,但貌似没有使用。我使用的是赵开勇的CUDA_VS_Wizard,主函数还是用的cu文件。以下代码是对Vibe背景建模算法的并行,背景建模算法是目前接触到易于并行的一类,如GMM等,而且加速效果不错,因为一个线程执行的数据就是对应一个像素点。代码如下:sample.cucpp view plaincopy<span style=font-size:14px;>/* * sample.cu * This is a example of the CUDA program. */ #include <stdio.h> #include &l
3、t;stdlib.h> #include <cutil_inline.h> #include <iostream> #include <string> #include opencv2/core/core.hpp #include opencv2/gpu/gpu.hpp #include opencv2/highgui/highgui.hpp #include Vibe_M_kernel.cu #include Vibe_M.h using namespace std; using namespace cv; using namespace cv:gp
4、u; enum Method FGD_STAT, MOG, MOG2, VIBE, GMG ; int main(int argc, const char* argv) cv:CommandLineParser cmd(argc, argv, c | camera | flase | use camera f | file | 768x576.avi | input video file m | method | vibe | method (fgd, mog, mog2, vibe, gmg) h | help | false | print help message ); if (cmd.
5、get<bool>(help) cout << Usage : bgfg_segm options << endl; cout << Avaible options: << endl; cmd.printParams(); return 0; bool useCamera = cmd.get<bool>(camera); string file = cmd.get<string>(file); string method = cmd.get<string>(method); if (method !
6、= fgd && method != mog && method != mog2 && method != vibe && method != gmg) cerr << Incorrect method << endl; return -1; Method m = method = fgd ? FGD_STAT : method = mog ? MOG : method = mog2 ? MOG2 : method = vibe ? VIBE : GMG; VideoCapture cap; if (use
7、Camera) cap.open(0); else cap.open(file); if (!cap.isOpened() cerr << can not open camera or video file << endl; return -1; Mat origin, frame; cap >> origin; cvtColor(origin,frame,CV_BGR2GRAY); GpuMat d_frame(frame); Vibe_M vibe; GpuMat d_fgmask; Mat fgmask; Mat fgimg; Mat bgimg; s
8、witch (m) case VIBE: vibe.initialize(d_frame); break; namedWindow(image, WINDOW_NORMAL); namedWindow(foreground mask, WINDOW_NORMAL); for(;) cap >> origin; if (origin.empty() break; cvtColor(origin,frame,CV_BGR2GRAY); d_frame.upload(frame); /update the model switch (m) case VIBE: vibe(d_frame,
9、 d_fgmask); break; d_fgmask.download(fgmask); imshow(image, frame); imshow(foreground mask, fgmask); int key = waitKey(30); if (key = 27) break; else if(key = ) cvWaitKey(0); exit(0); </span> Vibe_M.cppcpp view plaincopy<span style=font-size:14px;>#include Vibe_M.h namespace cv namespace
10、 gpu namespace device namespace vibe_m void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor); void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream); void update_gpu(PtrStepSzb frame, int cn, PtrStepSz
11、b fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream); namespace const int defaultNbSamples = 20; const int defaultReqMatches = 2; const int defaultRadius = 20; const int defaultSubsamplingFactor = 16; Vibe_M:Vibe_M(unsigned long rngSeed) : frameSize_(0, 0), rn
12、gSeed_(rngSeed) nbSamples = defaultNbSamples; reqMatches = defaultReqMatches; radius = defaultRadius; subsamplingFactor = defaultSubsamplingFactor; void Vibe_M:initialize(const GpuMat& firstFrame, Stream& s) using namespace cv:gpu:device:vibe_m; CV_Assert(firstFrame.type() = CV_8UC1 | firstF
13、rame.type() = CV_8UC3 | firstFrame.type() = CV_8UC4); /cudaStream_t stream = StreamAccessor:getStream(s); loadConstants(nbSamples, reqMatches, radius, subsamplingFactor); frameSize_ = firstFrame.size(); if (randStates_.size() != frameSize_) cv:RNG rng(rngSeed_); cv:Mat h_randStates(frameSize_, CV_8U
14、C4); rng.fill(h_randStates, cv:RNG:UNIFORM, 0, 255); randStates_.upload(h_randStates); int ch = firstFrame.channels(); int sample_ch = ch = 1 ? 1 : 4; samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch); init_gpu(firstFrame, ch, samples_, randStates_, 0); void Vibe_M:o
15、perator()(const GpuMat& frame, GpuMat& fgmask, Stream& s) using namespace cv:gpu:device:vibe_m; CV_Assert(frame.depth() = CV_8U); int ch = frame.channels(); int sample_ch = ch = 1 ? 1 : 4; if (frame.size() != frameSize_ | sample_ch != samples_.channels() initialize(frame); fgmask.create(
16、frameSize_, CV_8UC1); update_gpu(frame, ch, fgmask, samples_, randStates_, StreamAccessor:getStream(s); void Vibe_M:release() frameSize_ = Size(0, 0); randStates_.release(); samples_.release(); </span> Vibe_M.hcpp view plaincopy<span style=font-size:14px;>#ifndef _VIBE_M_H_ #define _VIBE
17、_M_H_ #ifndef SKIP_INCLUDES #include <vector> #include <memory> #include <iosfwd> #endif #include opencv2/core/core.hpp #include opencv2/core/gpumat.hpp #include opencv2/gpu/gpu.hpp #include opencv2/imgproc/imgproc.hpp #include opencv2/objdetect/objdetect.hpp #include opencv2/featu
18、res2d/features2d.hpp using namespace std; using namespace cv; using namespace cv:gpu; class Vibe_M public: /! the default constructor explicit Vibe_M(unsigned long rngSeed = 1234567); /! re-initiaization method void initialize(const GpuMat& firstFrame, Stream& stream = Stream:Null(); /! the
19、update operator void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream:Null(); /! releases all inner buffers void release(); int nbSamples; / number of samples per pixel int reqMatches; / #_min int radius; / R int subsamplingFactor; / amount of random subsampling pr
20、ivate: Size frameSize_; unsigned long rngSeed_; GpuMat randStates_; GpuMat samples_; ; #endif</span> Vibe_M.cuhtml view plaincopy<span style=font-size:14px;>#include Vibe_M.h #include opencv2/gpu/stream_accessor.hpp namespace cv namespace gpu namespace device namespace vibe_m void loadCo
21、nstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor); void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream); void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigne
22、d int> randStates, cudaStream_t stream); namespace const int defaultNbSamples = 20; const int defaultReqMatches = 2; const int defaultRadius = 20; const int defaultSubsamplingFactor = 16; Vibe_M:Vibe_M(unsigned long rngSeed) : frameSize_(0, 0), rngSeed_(rngSeed) nbSamples = defaultNbSamples; reqM
23、atches = defaultReqMatches; radius = defaultRadius; subsamplingFactor = defaultSubsamplingFactor; void Vibe_M:initialize(const GpuMat& firstFrame, Stream& s) using namespace cv:gpu:device:vibe_m; CV_Assert(firstFrame.type() = CV_8UC1 | firstFrame.type() = CV_8UC3 | firstFrame.type() = CV_8UC
24、4); cudaStream_t stream = cv:gpu:StreamAccessor:getStream(s); loadConstants(nbSamples, reqMatches, radius, subsamplingFactor); frameSize_ = firstFrame.size(); if (randStates_.size() != frameSize_) cv:RNG rng(rngSeed_); cv:Mat h_randStates(frameSize_, CV_8UC4); rng.fill(h_randStates, cv:RNG:UNIFORM,
25、0, 255); randStates_.upload(h_randStates); int ch = firstFrame.channels(); int sample_ch = ch = 1 ? 1 : 4; samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch); init_gpu(firstFrame, ch, samples_, randStates_, stream); void Vibe_M:operator()(const GpuMat& frame, GpuM
26、at& fgmask, Stream& s) using namespace cv:gpu:device:vibe_m; CV_Assert(frame.depth() = CV_8U); int ch = frame.channels(); int sample_ch = ch = 1 ? 1 : 4; if (frame.size() != frameSize_ | sample_ch != samples_.channels() initialize(frame); fgmask.create(frameSize_, CV_8UC1); update_gpu(frame,
27、 ch, fgmask, samples_, randStates_, cv:gpu:StreamAccessor:getStream(s); void Vibe_M:release() frameSize_ = Size(0, 0); randStates_.release(); samples_.release(); </span> Vibe_M_kernel.cucpp view plaincopy<span style=font-size:14px;>#include opencv2/gpu/device/common.hpp #include opencv2/
28、gpu/device/vec_math.hpp namespace cv namespace gpu namespace device namespace vibe_m _constant_ int c_nbSamples; _constant_ int c_reqMatches; _constant_ int c_radius; _constant_ int c_subsamplingFactor; void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor) cudaSafeCall
29、( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int) ); cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int) ); cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int) ); cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(
30、int) ); _device_ _forceinline_ uint nextRand(uint& state) /const unsigned int CV_RNG_COEFF = 4164903690U;/已经定义 state = state * CV_RNG_COEFF + (state >> 16); return state; _constant_ int c_xoff9 = -1, 0, 1, -1, 1, -1, 0, 1, 0; _constant_ int c_yoff9 = -1, -1, -1, 0, 0, 1, 1, 1, 0; _device_
31、_forceinline_ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8) int idx = nextRand(randState) % count; return make_int2(x + c_xoffidx, y + c_yoffidx); _device_ _forceinline_ uchar cvt(uchar val) return val; _device_ _forceinline_ uchar4 cvt(const uchar3& val) return make_uchar4(val.x, val.y, val.z, 0); _device_ _forceinline_ uchar4 cvt(const uchar4& val) return val; template <typename SrcT,
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1