opencvHOG算法注释.docx

资源描述

opencvHOG算法注释.docx

《opencvHOG算法注释.docx》由会员分享，可在线阅读，更多相关《opencvHOG算法注释.docx（43页珍藏版）》请在冰豆网上搜索。

opencvHOG算法注释.docx

opencvHOG算法注释

#include"_cvaux.h"

/*****************************************************************************************

structCV_EXPORTSHOGDescriptor

{

public:

enum{L2Hys=0};

HOGDescriptor（）:

winSize（64,128）,blockSize（16,16）,blockStride（8,8）,

cellSize（8,8）,nbins（9）,derivAperture

（1）,winSigma（-1）,

histogramNormType（L2Hys）,L2HysThreshold（0.2）,gammaCorrection（true）

{}

HOGDescriptor（Size_winSize,Size_blockSize,Size_blockStride,

Size_cellSize,int_nbins,int_derivAperture=1,double_winSigma=-1,

int_histogramNormType=L2Hys,double_L2HysThreshold=0.2,bool_gammaCorrection=false）

winSize（_winSize）,blockSize（_blockSize）,blockStride（_blockStride）,cellSize（_cellSize）,

nbins（_nbins）,derivAperture（_derivAperture）,winSigma（_winSigma）,

histogramNormType（_histogramNormType）,L2HysThreshold（_L2HysThreshold）,

gammaCorrection（_gammaCorrection）

{}

HOGDescriptor（constString&filename）

{

load（filename）;

}

virtual~HOGDescriptor（）{}

size_tgetDescriptorSize（）const;

boolcheckDetectorSize（）const;

doublegetWinSigma（）const;

virtualvoidsetSVMDetector（constvector&_svmdetector）;

virtualboolload（constString&filename,constString&objname=String（））;

virtualvoidsave（constString&filename,constString&objname=String（））const;

virtualvoidcompute（constMat&img,

vector&descriptors,

SizewinStride=Size（）,Sizepadding=Size（）,

constvector&locations=vector（））const;

virtualvoiddetect（constMat&img,vector&foundLocations,

doublehitThreshold=0,SizewinStride=Size（）,

Sizepadding=Size（）,

constvector&searchLocations=vector（））const;

virtualvoiddetectMultiScale（constMat&img,vector&foundLocations,

doublehitThreshold=0,SizewinStride=Size（）,

Sizepadding=Size（）,doublescale=1.05,

intgroupThreshold=2）const;

//Mat&angleOfs,与后文Mat&qangle不一致，怀疑是笔误，由于qangle与angleOfs有不同含义，尽量改过来

virtualvoidcomputeGradient（constMat&img,Mat&grad,Mat&angleOfs,

SizepaddingTL=Size（）,SizepaddingBR=Size（））const;

staticvectorgetDefaultPeopleDetector（）;

SizewinSize;//窗口大小

SizeblockSize;//Block大小

SizeblockStride;//block每次移动宽度包括水平和垂直两个方向

SizecellSize;//Cell单元大小

intnbins;//直方图bin数目

intderivAperture;//不知道什么用

doublewinSigma;//高斯函数的方差

inthistogramNormType;//直方图归一化类型，具体见论文

doubleL2HysThreshold;//L2Hys化中限制最大值为0.2

boolgammaCorrection;//是否Gamma校正

vectorsvmDetector;//检测算子

};

**********************************************************************************/

namespacecv

{

size_tHOGDescriptor:

getDescriptorSize（）const

{

//检测数据的合理性

CV_Assert（blockSize.width%cellSize.width==0&&

blockSize.height%cellSize.height==0）;

CV_Assert（（winSize.width-blockSize.width）%blockStride.width==0&&

（winSize.height-blockSize.height）%blockStride.height==0）;

//Descriptor的大小

return（size_t）nbins*

（blockSize.width/cellSize.width）*

（blockSize.height/cellSize.height）*

（（winSize.width-blockSize.width）/blockStride.width+1）*

（（winSize.height-blockSize.height）/blockStride.height+1）;

//9*（16/8）*（16/8）*（（64-16）/8+1）*（（128-16）/8+1）=9*2*2*7*15=3780，实际上的检测算子为3781，多的1表示偏置

}

doubleHOGDescriptor:

getWinSigma（）const

{

//winSigma默认为-1,然而有下式知，实际上为4；否则自己选择参数

returnwinSigma>=0?

winSigma:

（blockSize.width+blockSize.height）/8.;

}

boolHOGDescriptor:

checkDetectorSize（）const

{

//size_t:

unsignedint

size_tdetectorSize=svmDetector.size（）,descriptorSize=getDescriptorSize（）;

//三种情况任意一种为true则表达式为true，实际上是最后一种

returndetectorSize==0||

detectorSize==descriptorSize||

detectorSize==descriptorSize+1;

}

voidHOGDescriptor:

setSVMDetector（constvector&_svmDetector）

{

svmDetector=_svmDetector;

CV_Assert（checkDetectorSize（））;

}

boolHOGDescriptor:

load（constString&filename,constString&objname）

{

//XML/YML文件存储

FileStoragefs（filename,FileStorage:

READ）;

//objname为空，！

1=0，选择fs.getFirstTopLevelNode（）;否则为fs[objname]

//注意到FileStorage中[]重载了：

FileNodeoperator[]（conststring&nodename）（returnsthetop-levelnodebyname ）

FileNodeobj=!

objname.empty（）?

fs[objname]:

fs.getFirstTopLevelNode（）;

if（!

obj.isMap（））

returnfalse;

FileNodeIteratorit=obj["winSize"].begin（）;

it>>winSize.width>>winSize.height;

it=obj["blockSize"].begin（）;

it>>blockSize.width>>blockSize.height;

it=obj["blockStride"].begin（）;

it>>blockStride.width>>blockStride.height;

it=obj["cellSize"].begin（）;

it>>cellSize.width>>cellSize.height;

obj["nbins"]>>nbins;

obj["derivAperture"]>>derivAperture;

obj["winSigma"]>>winSigma;

obj["histogramNormType"]>>histogramNormType;

obj["L2HysThreshold"]>>L2HysThreshold;

obj["gammaCorrection"]>>gammaCorrection;

FileNodevecNode=obj["SVMDetector"];

if（vecNode.isSeq（））

{

vecNode>>svmDetector;

CV_Assert（checkDetectorSize（））;

}

returntrue;

}

voidHOGDescriptor:

save（constString&filename,constString&objName）const

{

FileStoragefs（filename,FileStorage:

WRITE）;

//空的对象名则取默认名，输出有一定格式，对象名后紧接{

fs<<（!

objName.empty（）?

objName:

FileStorage:

getDefaultObjectName（filename））<<"{";

//之后依次为：

fs<<"winSize"<

<<"blockSize"<

<<"blockStride"<

<<"cellSize"<

<<"nbins"<

<<"derivAperture"<

<<"winSigma"<

<<"histogramNormType"<

<<"L2HysThreshold"<

<<"gammaCorrection"<

if（!

svmDetector.empty（））

fs<<"SVMDetector"<<"[:

//注意还要输出"}"

fs<<"}";

}

//img:

原始图像

//grad:

记录每个像素所属bin对应的权重的矩阵,为幅值乘以权值

//这个权值是关键，也很复杂：

包括高斯权重，三次插值的权重，在本函数中先值考虑幅值和相邻bin间的插值权重

//qangle:

记录每个像素角度所属的bin序号的矩阵,均为2通道,为了线性插值

//paddingTL:

Top和Left扩充像素数

//paddingBR:

类似同上

//功能：

计算img经扩张后的图像中每个像素的梯度和角度

voidHOGDescriptor:

computeGradient（constMat&img,Mat&grad,Mat&qangle,

SizepaddingTL,SizepaddingBR）const

{

//先判断是否为单通道的灰度或者3通道的图像

CV_Assert（img.type（）==CV_8U||img.type（）==CV_8UC3）;

//计算gradient的图的大小,由64*128==》112*160，则会产生5*7=35个窗口（windowstride:

8）

//每个窗口105个block,105*36=3780维特征向量

//paddingTL.width=16,paddingTL.height=24

Sizegradsize（img.cols+paddingTL.width+paddingBR.width,

img.rows+paddingTL.height+paddingBR.height）;

//注意grad和qangle是2通道的矩阵，为3D-trilinear插值中的orientation维度，另两维为坐标x与y

grad.create（gradsize,CV_32FC2）;//

qangle.create（gradsize,CV_8UC2）;//[0..nbins-1]-quantizedgradientorientation

//wholeSize为parentmatrix大小，不是扩展后gradsize的大小

//roiofs即为img在parentmatrix中的偏置

//对于正样本img=parentmatrix;但对于负样本img是从parentimg中抽取的10个随机位置

//至于OpenCv具体是怎么操作，使得img和parentimg相联系，不是很了解

//wholeSize与roiofs仅在padding时有用，可以不管，就认为传入的img==parentimg，是否是从parentimg中取出无所谓

SizewholeSize;

Pointroiofs;

img.locateROI（wholeSize,roiofs）;

inti,x,y;

intcn=img.channels（）;

//产生1行256列的向量，lut为列向量头地址

Mat__lut（1,256）;

constfloat*lut=&_lut（0,0）;

//gamma校正，作者的编程思路很有意思

//初看不知道这怎么会与图像的gamma校正有关系，压根img都没出现，看到后面大家会豁然开朗的

if（gammaCorrection）

for（i=0;i<256;i++）

_lut（0,i）=std:

sqrt（（float）i）;

else

for（i=0;i<256;i++）

_lut（0,i）=（float）i;

//开辟空间存xmap和ymap，其中各占gradsize.width+2和gradsize.height+2空间

//+2是为了计算dx,dy时用[-1,0,1]算子,即使在扩充图像中，其边缘计算梯度时还是要再额外加一个像素的

//作者很喜欢直接用内存地址及之间的关系，初看是有点头大的

//另外再说说xmap与ymap的作用：

其引入是因为img图像需要扩充到gradsize大小

//如果我们计算img中位于（-5，-6）像素时，需要将基于img的（-5,-6）坐标，映射为基于grad和qangle的坐标（xmap,ymap）

AutoBuffermapbuf（gradsize.width+gradsize.height+4）;

int*xmap=（int*）mapbuf+1;

int*ymap=xmap+gradsize.width+2;

//BORDER_REFLECT_101:

（左插值）gfedcb|abcdefgh（原始像素）|gfedcba（右插值）,一种插值模式 constintborderType=（int）BORDER_REFLECT_101;

//borderInterpolate函数完成两项操作，一是利用插值扩充img，二是返回x-paddingTL.width+roiofs.x映射后的坐标xmap

//例如，ximg=x（取0）-paddingTL.width（取24）+roiofs.x（取0）=-24==>xmap[0]=0

//即img中x=-24,映射到grad中xmap=0,并且存在xmap[0]中,至于borderInterpolate的具体操作可以不必细究

for（x=-1;x

xmap[x]=borderInterpolate（x-paddingTL.width+roiofs.x,

wholeSize.width,borderType）;

for（y=-1;y

ymap[y]=borderInterpolate（y-paddingTL.height+roiofs.y,

wholeSize.height,borderType）;

//x-&y-derivativesforthewholerow

//由于后面的循环是以行为单位，每次循环内存重复使用，所以只要记录一行的信息而不是整个矩阵

intwidth=gradsize.width;

AutoBuffer_dbuf（width*4）;

float*dbuf=_dbuf;

//注意到内存的连续性方便之后的编程

MatDx（1,width,CV_32F,dbuf）;

MatDy（1,width,CV_32F,dbuf+width）;

MatMag（1,width,CV_32F,dbuf+width*2）;

MatAngle（1,width,CV_32F,dbuf+width*3）;

int_nbins=nbins;

floatangleScale=（float）（_nbins/CV_PI）;//9/pi

for（y=0;y

{

//指向每行的第一个元素,img.data为矩阵的第一个元素地址

constuchar*imgPtr=img.data+img.step*ymap[y];

constuchar*prevPtr=img.data+img.step*ymap[y-1];

constuchar*nextPtr=img.data+img.step*ymap[y+1];

float*gradPtr=（float*）grad.ptr（y）;

uchar*qanglePtr=（uchar*）qangle.ptr（y）;

//1通道

if（cn==1）

{

for（x=0;x

{

intx1=xmap[x];

//imgPtr指向img第y行首元素，imgPtr[x]即表示第（x,y）像素，其亮度值位于0~255，对应lut[0]~lut[255]

//即若像素亮度为120，则对应lut[120]，若有gamma校正，lut[120]=sqrt（120）

//由于补充了虚拟像素，即在imgPtr[-1]无法表示gradsize中-1位置元素，而需要有个转换

//imgPtr[-1-paddingTL.width+roiofs.x],即imgPtr[xmap[-1]]，即gradsize中-1位置元素为img中xmap[-1]位置的元素

dbuf[x]=（float）（lut[imgPtr[xmap[x+1]]]-lut[imgPtr[xmap[x-1]]]）;

//由于内存的连续性，隔width,即存Dy

dbuf[width+x]=（float）（lut[nextPtr[x1]]-lut[prevPtr[x1]]）;

}

else

//3通道,3通道中取最大值

{

for（x=0;x

{

intx1=xmap[x]*3;

constuchar*p2=imgPtr+xmap[x+1]*3;

constuchar*p0=imgPtr+xmap[x-1]*3;

floatdx0,dy0,dx,dy,mag0,mag;

dx0=lut[p2[2]]-lut[p0[2]];

dy0=lut[nextPtr[x1+2]]-lut[prevPtr[x1+2]];

mag0=dx0*dx0+dy0*dy0;

dx=lut[p2[1]]-lut[p0[1]];

dy=lut[nextPtr[x1+1]]-lut[prevPtr[x1+1]];

mag=dx*dx+dy*dy;

if（mag0

{

dx0=dx;

dy0=dy;

mag0=mag;

}

dx=lut[p2[0]]-lut[p0[0]];

dy=lut[nextPtr[x1]]-lut[prevPtr[x1]];

展开阅读全文