k近邻法的C++实现kd树电脑资料.docx

资源描述

k近邻法的C++实现kd树电脑资料.docx

《k近邻法的C++实现kd树电脑资料.docx》由会员分享，可在线阅读，更多相关《k近邻法的C++实现kd树电脑资料.docx（16页珍藏版）》请在冰豆网上搜索。

k近邻法的C++实现kd树电脑资料.docx

k近邻法的C++实现kd树电脑资料

k近邻法的C++实现：

kd树-电脑资料

1.k近邻算法的思想

给定一个训练集，对于新的输入实例，在训练集中找到与该实例最近的k个实例，这k个实例中的多数属于某个类，就把该输入实例分为这个类，

因为要找到最近的k个实例，所以计算输入实例与训练集中实例之间的距离是关键！

k近邻算法最简单的方法是线性扫描，这时要计算输入实例与每一个训练实例的距离，当训练集很大时，非常耗时，这种方法不可行，为了提高k近邻的搜索效率，常常考虑使用特殊的存储结构存储训练数据，以减少计算距离的次数，具体方法很多，这里介绍实现经典的kd树方法。

2.构造kd树

kd树是一种对k维空间中的实例点进行存储以便对其进行快速检索的树形数据结构，kd树是二叉树。

下面举例说明：

给定一个二维空间的数据集:

T={（2,3）,（5,4）,（9,6）,（4,7）,（8,1）,（7,2）},构造一个平衡kd树。

根结点对应包含数据集T的矩形选择x

（1）轴，6个数据点的x

（1）坐标的中位数是7，以超平面x

（1）=7将空间分为左右两个子矩形（子结点）

左矩形以x

（2）=4为中位数分为两个子矩形

右矩形以x

（2）=6分为两个子矩形

如此递归，直到两个子区域没有实例存在时停止

3.利用kd树搜索最近邻

输入：

已构造的kd树；目标点x;

输出：

x的最近邻

在kd树中找出包含目标点x的叶结点：

从根结点出发，递归的向下访问kd树，若目标点x的当前维的坐标小于切分点的坐标，则移动到左子结点，否则移动到右子结点，直到子结点为叶结点为止。

以此叶结点为“当前最近点”

递归地向上回退，在每个结点进行以下操作：

（a）如果该结点保存的实例点比当前最近点距离目标点更近，则以该实例点为“当前最近点”;

（b）当前最近点一定存在于某结点一个子结点对应的区域，检查该子结点的父结点的另

一子结点对应区域是否有更近的点（即检查另一子结点对应的区域是否与以目标点为球

心、以目标点与“当前最近点”间的距离为半径的球体相交）；如果相交，可能在另一

个子结点对应的区域内存在距目标点更近的点，移动到另一个子结点，接着递归进行最

近邻搜索；如果不相交，向上回退

当回退到根结点时，搜索结束，最后的“当前最近点”即为x的最近邻点。

4.C++实现

1#include

2#include

3#include

4#include

5#include

6usingnamespacestd;

11structKdTree{

12vectorroot;

13KdTree*parent;

14KdTree*leftChild;

15KdTree*rightChild;

16//默认构造函数

17KdTree（）{parent=leftChild=rightChild=NULL;}

18//判断kd树是否为空

19boolisEmpty（）

20{

21returnroot.empty（）;

22}

23//判断kd树是否只是一个叶子结点

24boolisLeaf（）

25{

26return（!

root.empty（））&&

27rightChild==NULL&&leftChild==NULL;

28}

29//判断是否是树的根结点

30boolisRoot（）

31{

32return（!

isEmpty（））&&parent==NULL;

33}

34//判断该子kd树的根结点是否是其父kd树的左结点

35boolisLeft（）

36{

37returnparent->leftChild->root==root;

38}

39//判断该子kd树的根结点是否是其父kd树的右结点

40boolisRight（）

41{

42returnparent->rightChild->root==root;

43}

44};

46intdata[6][2]={{2,3},{5,4},{9,6},{4,7},{8,1},{7,2}};

48template

49vector>Transpose（vector>Matrix）

50{

51unsignedrow=Matrix.size（）;

52unsignedcol=Matrix[0].size（）;

53vector>Trans（col,vector（row,0））;

54for（unsignedi=0;i

55{

56for（unsignedj=0;j

57{

58Trans[i][j]=Matrix[j][i];

59}

60}

61returnTrans;

62}

64template

65TfindMiddleValue（vectorvec）

66{

67sort（vec.begin（）,vec.end（））;

68autopos=vec.size（）/2;

69returnvec[pos];

70}

73//构建kd树

74voidbuildKdTree（KdTree*tree,vector>data,unsigneddepth）

75{

77//样本的数量

78unsignedsamplesNum=data.size（）;

79//终止条件

80if（samplesNum==0）

81{

82return;

83}

84if（samplesNum==1）

85{

86tree->root=data[0];

87return;

88}

89//样本的维度

90unsignedk=data[0].size（）;

91vector>transData=Transpose（data）;

92//选择切分属性

93unsignedsplitAttribute=depth%k;

94vectorsplitAttributeValues=transData[splitAttribute];

95//选择切分值

96doublesplitValue=findMiddleValue（splitAttributeValues）;

97//cout<<"splitValue"<

99//根据选定的切分属性和切分值，将数据集分为两个子集

100vector>subset1;

101vector>subset2;

102for（unsignedi=0;i

103{

104if（splitAttributeValues[i]==splitValue&&tree->root.empty（））

105tree->root=data[i];

106else

107{

108if（splitAttributeValues[i]

109subset1.push_back（data[i]）;

110else

111subset2.push_back（data[i]）;

112}

113}

114

115//子集递归调用buildKdTree函数

116

117tree->leftChild=newKdTree;

118tree->leftChild->parent=tree;

119tree->rightChild=newKdTree;

120tree->rightChild->parent=tree;

121buildKdTree（tree->leftChild,subset1,depth+1）;

122buildKdTree（tree->rightChild,subset2,depth+1）;

123}

124

125//逐层打印kd树

126voidprintKdTree（KdTree*tree,unsigneddepth）

127{

128for（unsignedi=0;i

129cout<<"\t";

130

131for（vector:

size_typej=0;jroot.size（）;++j）

132cout<root[j]<<",";

133cout<

134if（tree->leftChild==NULL&&tree->rightChild==NULL）//叶子节点

135return;

136else//非叶子节点

137{

138if（tree->leftChild!

=NULL）

139{

140for（unsignedi=0;i

141cout<<"\t";

142cout<<"left:

143printKdTree（tree->leftChild,depth+1）;

144}

145

146cout<

147if（tree->rightChild!

=NULL）

148{

149for（unsignedi=0;i

150cout<<"\t";

151cout<<"right:

152printKdTree（tree->rightChild,depth+1）;

153}

154cout<

155}

156}

157

158

159//计算空间中两个点的距离

160doublemeasureDistance（vectorpoint1,vectorpoint2,unsignedmethod）

161{

162if（point1.size（）!

=point2.size（））

163{

164cerr<<"Dimensionsdon'tmatch！

！

165exit

（1）;

166}

167switch（method）

168{

169case0:

//欧氏距离

170{

171doubleres=0;

172for（vector:

size_typei=0;i

173{

174res+=pow（（point1[i]-point2[i]）,2）;

175}

176returnsqrt（res）;

177}

178case1:

//曼哈顿距离

179{

180doubleres=0;

181for（vector:

size_typei=0;i

182{

183res+=abs（point1[i]-point2[i]）;

184}

185returnres;

186}

187default:

188{

189cerr<<"Invalidmethod!

190return-1;

191}

192}

193}

194//在kd树tree中搜索目标点goal的最近邻

195//输入：

目标点；已构造的kd树

196//输出：

目标点的最近邻

197vectorsearchNearestNeighbor（vectorgoal,KdTree*tree）

198{

199/*第一步：

在kd树中找出包含目标点的叶子结点：

从根结点出发，

200递归的向下访问kd树，若目标点的当前维的坐标小于切分点的

201坐标，则移动到左子结点，否则移动到右子结点，直到子结点为

202叶结点为止,以此叶子结点为“当前最近点”

203*/

204unsignedk=tree->root.size（）;//计算出数据的维数

205unsignedd=0;//维度初始化为0，即从第1维开始

206KdTree*currentTree=tree;

207vectorcurrentNearest=currentTree->root;

208while（!

currentTree->isLeaf（））

209{

210unsignedindex=d%k;//计算当前维

211if（currentTree->rightChild->isEmpty（）||goal[index]

212{

213currentTree=currentTree->leftChild;

214}

215else

216{

217currentTree=currentTree->rightChild;

218}

219++d;

220}

221currentNearest=currentTree->root;

222

223/*第二步：

递归地向上回退，在每个结点进行如下操作：

224（a）如果该结点保存的实例比当前最近点距离目标点更近，则以该例点为“当前最近点”

225（b）当前最近点一定存在于某结点一个子结点对应的区域，检查该子结点的父结点的另

226一子结点对应区域是否有更近的点（即检查另一子结点对应的区域是否与以目标点为球

227心、以目标点与“当前最近点”间的距离为半径的球体相交）；如果相交，可能在另一

228个子结点对应的区域内存在距目标点更近的点，移动到另一个子结点，接着递归进行最

229近邻搜索；如果不相交，向上回退*/

230

231//当前最近邻与目标点的距离

232doublecurrentDistance=measureDistance（goal,currentNearest,0）;

233

234//如果当前子kd树的根结点是其父结点的左孩子，则搜索其父结点的右孩子结点所代表

235//的区域，反之亦反

236KdTree*searchDistrict;

237if（currentTree->isLeft（））

238{

239if（currentTree->parent->rightChild==NULL）

240searchDistrict=currentTree;

241else

242searchDistrict=currentTree->parent->rightChild;

243}

244else

245{

246searchDistrict=currentTree->parent->leftChild;

247}

248

249//如果搜索区域对应的子kd树的根结点不是整个kd树的根结点，继续回退搜索

250while（searchDistrict->parent!

=NULL）

251{

252//搜索区域与目标点的最近距离

253doubledistrictDistance=abs（goal[（d+1）%k]-searchDistrict->parent->root[（d+1）%k]）;

254

255//如果“搜索区域与目标点的最近距离”比“当前最近邻与目标点的距离”短，表明搜索

256//区域内可能存在距离目标点更近的点

257if（districtDistance

searchDistrict->isEmpty（）

258{

259

260doubleparentDistance=measureDistance（goal,searchDistrict->parent->root,0）;

261

262if（parentDistance

263{

264currentDistance=parentDistance;

265currentTree=searchDistrict->parent;

266currentNearest=currentTree->root;

267}

268if（!

searchDistrict->isEmpty（））

269{

270doublerootDistance=measureDistance（goal,searchDistrict->root,0）;

271if（rootDistance

272{

273currentDistance=rootDistance;

274currentTree=searchDistrict;

275currentNearest=currentTree->root;

276}

277}

278if（searchDistrict->leftChild!

=NULL）

279{

280doubleleftDistance=measureDistance（goal,searchDistrict->leftChild->root,0）;

281if（leftDistance

282{

283currentDistance=leftDistance;

284currentTree=searchDistrict;

285currentNearest=currentTree->root;

286}

287}

288if（searchDistrict->rightChild!

=NULL）

289{

290doublerightDistance=measureDistance（goal,searchDistrict->rightChild->root,0）;

291if（rightDistance

292{

293currentDistance=rightDistance;

294currentTree=searchDistrict;

295currentNearest=currentTree->root;

296}

297}

298}//endif

299

300if（searchDistrict->parent->parent!

=NULL）

301{

302searchDistrict=searchDistrict->parent->isLeft（）?

303searchDistrict->parent->parent->rightChild:

304searchDistrict->parent->parent->leftChild;

305}

306else

307{

308searchDistrict=searchDistrict->parent;

309}

310++d;

311}//endwhile

312returncurrentNearest;

313}

314

315intmain（）

316{

317vector>train（6,vector（2,0））;

318for（unsignedi=0;i<6;++i）

319for（unsignedj=0;j<2;++j）

320train[i][j]=data[i][j];

321

322KdTree*kdTree=newKdTree;

323buildKdTree（kdTree,train,0）;

324

325printKdTree（kdTree,0）;

326

327vectorgoal;

328goal.push_back（3）;

329goal.push_back（4.5）;

330vectornearestNeighbor=searchNearestNeighbor（goal,kdTree）;

331vector:

iteratorbeg=nearestNeighbor.begin（）;

332cout<<"Thenearestneighboris:

333while（beg!

=nearestNeighbor.end（））cout<<*beg++<<",";

334cout<

335return0;

336}

展开阅读全文