OpenCV环境下CUDA编程示例.docx
《OpenCV环境下CUDA编程示例.docx》由会员分享,可在线阅读,更多相关《OpenCV环境下CUDA编程示例.docx(7页珍藏版)》请在冰豆网上搜索。
OpenCV环境下CUDA编程示例
OpenCV环境下CUDA编程示例
在CUDA平台上对图像算法进行并行加速是目前并行计算方面比较简单易行的一种方式,而同时利用OpenCV提供的一些库函数的话,那么事情将会变得更加easy。
以下是我个人采用的一种模板,这个模板是从OpenCV里的算法CUDA源码挖掘出来的,我感觉这个用起来比较傲方便,所以经常采用。
首先大牛们写的源码都很鲁棒,考虑的比较全面(如大部分算法将1,3,4通道的图像同时搞定),感觉还有一个比较神奇的地方在于CPU端GpuMat和GPU端PtrStepSzb的转换,让我欲罢不能,一个不太理想的地方在于第一帧的初始化时间比较长,应该是CPU到GPU的数据传输。
代码中有考虑流,但貌似没有使用。
我使用的是赵开勇的CUDA_VS_Wizard,主函数还是用的cu文件。
以下代码是对Vibe背景建模算法的并行,背景建模算法是目前接触到易于并行的一类,如GMM等,而且加速效果不错,因为一个线程执行的数据就是对应一个像素点。
代码如下:
sample.cu
[cpp]viewplaincopy<spanstyle="font-size:
14px;">/*********************************************************************sample.cu*ThisisaexampleoftheCUDAprogram.*********************************************************************/#include<stdio.h>#include<stdlib.h>#include<cutil_inline.h>#include<iostream>#include<string>#include"opencv2/core/core.hpp"#include"opencv2/gpu/gpu.hpp"#include"opencv2/highgui/highgui.hpp"#include"Vibe_M_kernel.cu"#include"Vibe_M.h"usingnamespacestd;usingnamespacecv;usingnamespacecv:
:
gpu;enumMethod{FGD_STAT,MOG,MOG2,VIBE,GMG};intmain(intargc,constchar**argv){cv:
:
CommandLineParsercmd(argc,argv,"{c|camera|flase|usecamera}""{f|file|768x576.avi|inputvideofile}""{m|method|vibe|method(fgd,mog,mog2,vibe,gmg)}""{h|help|false|printhelpmessage}");if(cmd.get<bool>("help")){cout<<"Usage:
bgfg_segm[options]"<<endl;cout<<"Avaibleoptions:
"<<endl;cmd.printParams();return0;}booluseCamera=cmd.get<bool>("camera");stringfile=cmd.get<string>("file");stringmethod=cmd.get<string>("method");if(method!
="fgd"&&method!
="mog"&&method!
="mog2"&&method!
="vibe"&&method!
="gmg"){cerr<<"Incorrectmethod"<<endl;return-1;}Methodm=method=="fgd"?
FGD_STAT:
method=="mog"?
MOG:
method=="mog2"?
MOG2:
method=="vibe"?
VIBE:
GMG;VideoCapturecap;if(useCamera)cap.open(0);elsecap.open(file);if(!
cap.isOpened()){cerr<<"cannotopencameraorvideofile"<<endl;return-1;}Matorigin,frame;cap>>origin;cvtColor(origin,frame,CV_BGR2GRAY);GpuMatd_frame(frame);Vibe_Mvibe;GpuMatd_fgmask;Matfgmask;Matfgimg;Matbgimg;switch(m){caseVIBE:
vibe.initialize(d_frame);break;}namedWindow("image",WINDOW_NORMAL);namedWindow("foregroundmask",WINDOW_NORMAL);for(;;){cap>>origin;if(origin.empty())break;cvtColor(origin,frame,CV_BGR2GRAY);d_frame.upload(frame);//updatethemodelswitch(m){caseVIBE:
vibe(d_frame,d_fgmask);break;}d_fgmask.download(fgmask);imshow("image",frame);imshow("foregroundmask",fgmask);intkey=waitKey(30);if(key==27)break;elseif(key==''){cvWaitKey(0);}}exit(0);}</span>Vibe_M.cpp
[cpp]viewplaincopy<spanstyle="font-size:
14px;">#include"Vibe_M.h"namespacecv{namespacegpu{namespacedevice{namespacevibe_m{voidloadConstants(intnbSamples,intreqMatches,intradius,intsubsamplingFactor);voidinit_gpu(PtrStepSzbframe,intcn,PtrStepSzbsamples,PtrStepSz<unsignedint>randStates,cudaStream_tstream);voidupdate_gpu(PtrStepSzbframe,intcn,PtrStepSzbfgmask,PtrStepSzbsamples,PtrStepSz<unsignedint>randStates,cudaStream_tstream);}}}}namespace{constintdefaultNbSamples=20;constintdefaultReqMatches=2;constintdefaultRadius=20;constintdefaultSubsamplingFactor=16;}Vibe_M:
:
Vibe_M(unsignedlongrngSeed):
frameSize_(0,0),rngSeed_(rngSeed){nbSamples=defaultNbSamples;reqMatches=defaultReqMatches;radius=defaultRadius;subsamplingFactor=defaultSubsamplingFactor;}voidVibe_M:
:
initialize(constGpuMat&firstFrame,Stream&s){usingnamespacecv:
:
gpu:
:
device:
:
vibe_m;CV_Assert(firstFrame.type()==CV_8UC1||firstFrame.type()==CV_8UC3||firstFrame.type()==CV_8UC4);//cudaStream_tstream=StreamAccessor:
:
getStream(s);loadConstants(nbSamples,reqMatches,radius,subsamplingFactor);frameSize_=firstFrame.size();if(randStates_.size()!
=frameSize_){cv:
:
RNGrng(rngSeed_);cv:
:
Math_randStates(frameSize_,CV_8UC4);rng.fill(h_randStates,cv:
:
RNG:
:
UNIFORM,0,255);randStates_.upload(h_randStates);}intch=firstFrame.channels();intsample_ch=ch==1?
1:
4;samples_.create(nbSamples*frameSize_.height,frameSize_.width,CV_8UC(sample_ch));init_gpu(firstFrame,ch,samples_,randStates_,0);}voidVibe_M:
:
operator()(constGpuMat&frame,GpuMat&fgmask,Stream&s){usingnamespacecv:
:
gpu:
:
device:
:
vibe_m;CV_Assert(frame.depth()==CV_8U);intch=frame.channels();intsample_ch=ch==1?
1:
4;if(frame.size()!
=frameSize_||sample_ch!
=samples_.channels())initialize(frame);fgmask.create(frameSize_,CV_8UC1);update_gpu(frame,ch,fgmask,samples_,randStates_,StreamAccessor:
:
getStream(s));}voidVibe_M:
:
release(){frameSize_=Size(0,0);randStates_.release();samples_.release();}</span>Vibe_M.h
[cpp]viewplaincopy<spanstyle="font-size:
14px;">#ifndef_VIBE_M_H_#define_VIBE_M_H_#ifndefSKIP_INCLUDES#include<vector>#include<memory>#include<iosfwd>#endif#include"opencv2/core/core.hpp"#include"opencv2/core/gpumat.hpp"#include"opencv2/gpu/gpu.hpp"#include"opencv2/imgproc/imgproc.hpp"#include"opencv2/objdetect/objdetect.hpp"#include"opencv2/features2d/features2d.hpp"usingnamespacestd;usingnamespacecv;usingnamespacecv:
:
gpu;classVibe_M{public:
//!
thedefaultconstructorexplicitVibe_M(unsignedlongrngSeed=1234567);//!
re-initiaizationmethodvoidinitialize(constGpuMat&firstFrame,Stream&stream=Stream:
:
Null());//!
theupdateoperatorvoidoperator()(constGpuMat&frame,GpuMat&fgmask,Stream&stream=Stream:
:
Null());//!
releasesallinnerbuffersvoidrelease();intnbSamples;//numberofsamplesperpixelintreqMatches;//#_minintradius;//RintsubsamplingFactor;//amountofrandomsubsamplingprivate:
SizeframeSize_;unsignedlongrngSeed_;GpuMatrandStates_;GpuMatsamples_;};#endif</span>Vibe_M.cu
[html]viewplaincopy<spanstyle="font-size:
14px;">#include"Vibe_M.h"#include"opencv2/gpu/stream_accessor.hpp"namespacecv{namespacegpu{namespacedevice{namespacevibe_m{voidloadConstants(intnbSamples,intreqMatches,intradius,intsubsamplingFactor);voidinit_gpu(PtrStepSzbframe,intcn,PtrStepSzbsamples,PtrStepSz<unsignedint>randStates,cudaStream_tstream);voidupdate_gpu(PtrStepSzbframe,intcn,PtrStepSzbfgmask,PtrStepSzbsamples,PtrStepSz<unsignedint>randStates,cudaStream_tstream);}}}}namespace{constintdefaultNbSamples=20;constintdefaultReqMatches=2;constintdefaultRadius=20;constintdefaultSubsamplingFactor=16;}Vibe_M:
:
Vibe_M(unsignedlongrngSeed):
frameSize_(0,0),rngSeed_(rngSeed){nbSamples=defaultNbSamples;reqMatches=defaultReqMatches;radius=defaultRadius;subsamplingFactor=defaultSubsamplingFactor;}voidVibe_M:
:
initialize(constGpuMat&firstFrame,Stream&s){usingnamespacecv:
:
gpu:
:
device:
:
vibe_m;CV_Assert(firstFrame.type()==CV_8UC1||firstFrame.type()==CV_8UC3||firstFrame.type()==CV_8UC4);cudaStream_tstream=cv:
:
gpu:
:
StreamAccessor:
:
getStream(s);loadConstants(nbSamples,reqMatches,radius,subsamplingFactor);frameSize_=firstFrame.size();if(randStates_.size()!
=frameSize_){cv:
:
RNGrng(rngSeed_);cv:
:
Math_randStates(frameSize_,CV_8UC4);rng.fill(h_randStates,cv:
:
RNG:
:
UNIFORM,0,255);randStates_.upload(h_randStates);}intch=firstFrame.channels();intsample_ch=ch==1?
1:
4;samples_.create(nbSamples*frameSize_.height,frameSize_.width,CV_8UC(sample_ch));init_gpu(firstFrame,ch,samples_,randStates_,stream);}voidVibe_M:
:
operator()(constGpuMat&frame,GpuMat&fgmask,Stream&s){usingnamespacecv:
:
gpu:
:
device:
:
vibe_m;CV_Assert(frame.depth()==CV_8U);intch=frame.channels();intsample_ch=ch==1?
1:
4;if(frame.size()!
=frameSize_||sample_ch!
=samples_.channels())initialize(frame);fgmask.create(frameSize_,CV_8UC1);update_gpu(frame,ch,fgmask,samples_,randStates_,cv:
:
gpu:
:
StreamAccessor:
:
getStream(s));}voidVibe_M:
:
release(){frameSize_=Size(0,0);randStates_.release();samples_.release();}</span>Vibe_M_kernel.cu
[cpp]viewplaincopy<spanstyle="font-size:
14px;">#include"opencv2/gpu/device/common.hpp"#include"opencv2/gpu/device/vec_math.hpp"namespacecv{namespacegpu{namespacedevice{namespacevibe_m{__constant__intc_nbSamples;__constant__intc_reqMatches;__constant__intc_radius;__constant__intc_subsamplingFactor;voidloadConstants(intnbSamples,intreqMatches,intradius,intsubsamplingFactor){cudaSafeCall(cudaMemcpyToSymbol(c_nbSamples,&nbSamples,sizeof(int)));cudaSafeCall(cudaMemcpyToSymbol(c_reqMatches,&reqMatches,sizeof(int)));cudaSafeCall(cudaMemcpyToSymbol(c_radius,&radius,sizeof(int)));cudaSafeCall(cudaMemcpyToSymbol(c_subsamplingFactor,&subsamplingFactor,sizeof(int)));}__device____forceinline__uintnextRand(uint&state){//constunsignedintCV_RNG_COEFF=4164903690U;//已经定义state=state*CV_RNG_COEFF+(state>>16);returnstate;}__constant__intc_xoff[9]={-1,0,1,-1,1,-1,0,1,0};__constant__intc_yoff[9]={-1,-1,-1,0,0,1,1,1,0};__device____forceinline__int2chooseRandomNeighbor(intx,inty,uint&randState,intcount=8){intidx=nextRand(randState)%count;returnmake_int2(x+c_xoff[idx],y+c_yoff[idx]);}__device____forceinline__ucharcvt(ucharval){returnval;}__device____forceinline__uchar4cvt(constuchar3&val){returnmake_uchar4(val.x,val.y,val.z,0);}__device____forceinline__uchar4cvt(constuchar4&val){returnval;}template<typenameSrcT,