SIFT算法实现理解及注释详解基于RobHess源码.docx

资源描述

SIFT算法实现理解及注释详解基于RobHess源码.docx

《SIFT算法实现理解及注释详解基于RobHess源码.docx》由会员分享，可在线阅读，更多相关《SIFT算法实现理解及注释详解基于RobHess源码.docx（37页珍藏版）》请在冰豆网上搜索。

SIFT算法实现理解及注释详解基于RobHess源码.docx

SIFT算法实现理解及注释详解基于RobHess源码

RobHess的SIFT算法实现理解及注释

SIFT算法不用我多解释了，这是一个很强大的算法，主要用于图像配准和物体识别等领域，但是其计算量相比也比较大，性价比比较高的算法包括PCA-SIFT和SURF其中OpenCV提供了SURF算法，但是为了方便理解。

这里给出了RobHess所实现的SIFT算法的实现以及注释，结合我自己的理解，如果，您有关于SIFT算法不理解的地方咱们可以一起交流一下。

或者您认为不详细的地方提出来。

SIFT算法的主要实现在sift.c这个文件，其主要流程为：

（1）首先创建初始图像，即通过将图像转换为32位的灰度图，然后将图像使用三次插值来方大，之后通过高斯模糊处理

（2）在此基础上进行高斯金字塔的构建以及高斯差分金字塔的构建

（3）对图像进行极值点检测

（4）计算特征向量的尺度

（5）调整图像大小

（6）计算特征的方向

（7）计算描述子，其中包括计算二维方向直方图并转换直方图为特征描述子

首先给出sift算法的整体框架代码：

输入参数：

img为输入图像；

feat为所要提取的特征指针；

intvl指的是高斯金字塔和差分金字塔的层数；

sigma指的是图像初始化过程中高斯模糊所使用的参数；

contr_thr是归一化之后的去除不稳定特征的阈值；

curv_thr指的是去除边缘的特征的主曲率阈值；

img_dbl是是否将图像放大为之前的两倍；

descr_with用来计算特征描述子的方向直方图的宽度；

descr_hist_bins是直方图中的条数

1.int _sift_features（ IplImage* img, struct feature** feat, int intvls,

2. double sigma, double contr_thr, int curv_thr,

3. int img_dbl, int descr_width, int descr_hist_bins ）

4.{

5. IplImage* init_img;

6. IplImage*** gauss_pyr, *** dog_pyr;

7. CvMemStorage* storage;

8. CvSeq* features;

9. int octvs, i, n = 0;

10.

11. /* check arguments */

12. if（ !

img ）

13. fatal_error（ "NULL pointer error, %s, line %d", __FILE__, __LINE__ ）;

14.

15. if（ !

feat ）

16. fatal_error（ "NULL pointer error, %s, line %d", __FILE__, __LINE__ ）;

17.

18. /* build scale space pyramid; smallest dimension of top level is ~4 pixels */

19. /* 构建高斯尺度空间金字塔，顶层最小的为4像素 */

20. init_img = create_init_img（ img, img_dbl, sigma ）;

21. octvs = log（ double MIN（ init_img->width, init_img->height ）） / log（2.0） - 2;

22. //构建高斯金字塔和高斯差分金字塔

23. gauss_pyr = build_gauss_pyr（ init_img, octvs, intvls, sigma ）;

24. dog_pyr = build_dog_pyr（ gauss_pyr, octvs, intvls ）;

25.

26. storage = cvCreateMemStorage（ 0 ）;

27.

28. //尺度空间极值点检测

29. features = scale_space_extrema（ dog_pyr, octvs, intvls, contr_thr,

30. curv_thr, storage ）;

31.

32. //画出去除低对比度的极值点

33. //draw_extrempoint（img , features）;

34.

35.

36.

37.

38. //计算特征向量的尺度

39. calc_feature_scales（ features, sigma, intvls ）;

40. if（ img_dbl ）

41. adjust_for_img_dbl（ features ）;

42. //计算特征的方向

43. calc_feature_oris（ features, gauss_pyr ）;

44. //计算描述子，包括计算二维方向直方图和转换其为特征描述子

45. compute_descriptors（ features, gauss_pyr, descr_width, descr_hist_bins ）;

46.

47. /* sort features by decreasing scale and move from CvSeq to array */

48. cvSeqSort（ features, （CvCmpFunc）feature_cmp, NULL ）;

49. n = features->total;

50. *feat = static_cast（ calloc（ n, sizeof（struct feature）））;

51. *feat = static_cast（ cvCvtSeqToArray（ features, *feat, CV_WHOLE_SEQ ））;

52.

53.

54.

55.

56. for（ i = 0; i < n; i++ ）

57. {

58. free（（*feat）[i].feature_data ）;

59. （*feat）[i].feature_data = NULL;

60. }

61.

62. cvReleaseMemStorage（ &storage ）;

63. cvReleaseImage（ &init_img ）;

64. release_pyr（ &gauss_pyr, octvs, intvls + 3 ）;

65. release_pyr（ &dog_pyr, octvs, intvls + 2 ）;

66. return n;

67.}

（1）初始化图像

输入参数：

这里不需要解释了

该函数主要用来初始化图像，转换图像为32位灰度图以及进行高斯模糊。

1.static IplImage* create_init_img（ IplImage* img, int img_dbl, double sigma ）

2.{

3. IplImage* gray, * dbl;

4. float sig_diff;

6. gray = convert_to_gray32（ img ）;

7. if（ img_dbl ）

8. {

9. sig_diff = sqrt（ sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4 ）;

10. dbl = cvCreateImage（ cvSize（ img->width*2, img->height*2 ）,

11. IPL_DEPTH_32F, 1 ）;

12. cvResize（ gray, dbl, CV_INTER_CUBIC ）;

13. cvSmooth（ dbl, dbl, CV_GAUSSIAN, 0, 0, sig_diff, sig_diff ）;

14. cvReleaseImage（ &gray ）;

15. return dbl;

16. }

17. else

18. {

19. sig_diff = sqrt（ sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA ）;

20. cvSmooth（ gray, gray, CV_GAUSSIAN, 0, 0, sig_diff, sig_diff ）;

21. return gray;

22. }

23.}

（2）构建高斯金字塔

输入参数：

octvs是高斯金字塔的组

invls是高斯金字塔的层数

sigma是初始的高斯模糊参数，后续也通过它计算每一层所使用的sigma

13px;">static IplImage*** build_gauss_pyr（ IplImage* base, int octvs,int intvls, double sigma ）

2.{

3. IplImage*** gauss_pyr;

4. double* sig = static_cast（ calloc（ intvls + 3, sizeof（double）））;

5. double sig_total, sig_prev, k;

6. int i, o;

8. gauss_pyr = static_cast（ calloc（ octvs, sizeof（ IplImage** ）））;

9. for（ i = 0; i < octvs; i++ ）

10. gauss_pyr[i] = static_cast（ calloc（ intvls + 3, sizeof（ IplImage* ）））;

11.

12. /*

13. precompute Gaussian sigmas using the following formula:

14. 预计算每次高斯模糊的sigma

15.

16. \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2

17. */

18. sig[0] = sigma;

19. k = pow（ 2.0, 1.0 / intvls ）;

20. for（ i = 1; i < intvls + 3; i++ ）

21. {

22. sig_prev = pow（ k, i - 1 ） * sigma;

23. sig_total = sig_prev * k;

24. sig[i] = sqrt（ sig_total * sig_total - sig_prev * sig_prev ）;

25. }

26.

27.

28. for（ o = 0; o < octvs; o++ ）

29. for（ i = 0; i < intvls + 3; i++ ）

30. {

31. //对每一层进行降采样，形成高斯金字塔的每一层

32. if（ o == 0 && i == 0 ）

33. gauss_pyr[o][i] = cvCloneImage（base）;

34.

35. /* base of new octvave is halved image from end of previous octave */

36. //每一组的第一层都是通过对前面一组的最上面一层的降采样实现的

37. else if（ i == 0 ）

38. gauss_pyr[o][i] = downsample（ gauss_pyr[o-1][intvls] ）;

39.

40. /* blur the current octave's last image to create the next one */

41. //每一组的其他层则使通过使用不同sigma的高斯模糊来进行处理

42. else

43. {

44. gauss_pyr[o][i] = cvCreateImage（ cvGetSize（gauss_pyr[o][i-1]）,

45. IPL_DEPTH_32F, 1 ）;

46. cvSmooth（ gauss_pyr[o][i-1], gauss_pyr[o][i],

47. CV_GAUSSIAN, 0, 0, sig[i], sig[i] ）;

48. }

49. }

50.

51. free（ sig ）;

52. return gauss_pyr;

53.}

降采样处理

输入参数：

不解释

这就是降采样，其实就是将图像通过最近邻算法缩小为原来的一半

1.static IplImage* downsample（ IplImage* img ）

2.{

3. IplImage* smaller = cvCreateImage（ cvSize（img->width / 2, img->height / 2）,

4. img->depth, img->nChannels ）;

5. cvResize（ img, smaller, CV_INTER_NN ）;

7. return smaller;

8.}

（3）构建高斯差分金字塔

输入参数：

不解释了参见上面的说明即可

实际上差分金字塔的构成是通过对相邻层的图像进行相减获得的

16px;">static IplImage*** build_dog_pyr（ IplImage*** gauss_pyr, int octvs, int intvls ）

2.{

3. IplImage*** dog_pyr;

4. int i, o;

6. dog_pyr = static_cast（ calloc（ octvs, sizeof（ IplImage** ）））;

7. for（ i = 0; i < octvs; i++ ）

8. dog_pyr[i] = static_cast（ calloc（ intvls + 2, sizeof（IplImage*）））;

10. for（ o = 0; o < octvs; o++ ）

11. for（ i = 0; i < intvls + 2; i++ ）

12. {

13. dog_pyr[o][i] = cvCreateImage（ cvGetSize（gauss_pyr[o][i]）,

14. IPL_DEPTH_32F, 1 ）;

15. cvSub（ gauss_pyr[o][i+1], gauss_pyr[o][i], dog_pyr[o][i], NULL ）;

16. }

17.

18. return dog_pyr;

19.}

（4）极值点检测

输入参数：

contr_thr是去除对比度低的点所采用的阈值

curv_thr是去除边缘特征的阈值

1.static CvSeq* scale_space_extrema（ IplImage*** dog_pyr, int octvs, int intvls,

2. double contr_thr, int curv_thr,

3. CvMemStorage* storage ）

4.{

5. CvSeq* features;

6. double prelim_contr_thr = 0.5 * contr_thr / intvls;

7. struct feature* feat;

8. struct detection_data* ddata;

9. int o, i, r, c;

10.

11. features = cvCreateSeq（ 0, sizeof（CvSeq）, sizeof（struct feature）, storage ）;

12. for（ o = 0; o < octvs; o++ ）

13. for（ i = 1; i <= intvls; i++ ）

14. for（r = SIFT_IMG_BORDER; r < dog_pyr[o][0]->height-SIFT_IMG_BORDER; r++）

15. for（c = SIFT_IMG_BORDER; c < dog_pyr[o][0]->width-SIFT_IMG_BORDER; c++）

16. /* perform preliminary check on contrast */

17. if（ ABS（ pixval32f（ dog_pyr[o][i], r, c ）） > prelim_contr_thr ）

18. if（ is_extremum（ dog_pyr, o, i, r, c ））

19. {

20. feat = interp_extremum（dog_pyr, o, i, r, c, intvls, contr_thr）;

21. if（ feat ）

22. {

23. ddata = feat_detection_data（ feat ）;

24. if（ !

is_too_edge_like（ dog_pyr[ddata->octv][ddata->intvl],

25. ddata->r, ddata->c, curv_thr ））

26. {

27. cvSeqPush（ features, feat ）;

28. }

29. else

30. free（ ddata ）;

31. free（ feat ）;

32. }

33. }

34.

35. return features;

36.}

SIFT_IMG_BORDER是预定义的图像边缘；

通过和对比度阈值比较去掉低对比度的点；

而通过is_extremum来判断是否为极值点，如果是则通过极值点插值的方式获取亚像素的极值点的位置。

然后通过is_too_eage_like和所给的主曲率阈值判断是否为边缘点

*判断是否为极值点

其原理为:

通过和高斯金字塔的上一层的9个像素+本层的除了本像素自己的其他的8个像素和下一层的9个像素进行比较看是否为这26个像素中最小的一个或者是否为最大的一个，如果是则为极值点。

1.static int is_extremum（ IplImage*** dog_pyr, int octv, int intvl, int r, int c

展开阅读全文