实验报告聚类分析.docx

上传人:b****8 文档编号:9362094 上传时间:2023-02-04 格式:DOCX 页数:26 大小:378.36KB
下载 相关 举报
实验报告聚类分析.docx_第1页
第1页 / 共26页
实验报告聚类分析.docx_第2页
第2页 / 共26页
实验报告聚类分析.docx_第3页
第3页 / 共26页
实验报告聚类分析.docx_第4页
第4页 / 共26页
实验报告聚类分析.docx_第5页
第5页 / 共26页
点击查看更多>>
下载资源
资源描述

实验报告聚类分析.docx

《实验报告聚类分析.docx》由会员分享,可在线阅读,更多相关《实验报告聚类分析.docx(26页珍藏版)》请在冰豆网上搜索。

实验报告聚类分析.docx

实验报告聚类分析

实验报告聚类分析

实验原理:

K均值聚类、中心点聚类、系统聚类和EM算法聚类分析技术。

实验题目:

用鸢尾花的数据集,进行聚类挖掘分析。

实验要求:

探索鸢尾花数据的基本特征,利用不同的聚类挖掘方法,获得基本结论并简明解释。

实验题目--分析报告:

data(iris)

>rm(list=ls())

>gc()

used(Mb)gctrigger(Mb)maxused(Mb)

Ncells431730929718607591

Vcells78760583886081592403

>data(iris)

>data<-iris

>head(data)

Species

1setosa

2setosa

3setosa

4setosa

5setosa

6setosa

#Kmean聚类分析

>newiris<-iris

>newiris$Species<-NULL

>(kc<-kmeans(newiris,3))

K-meansclusteringwith3clustersofsizes62,50,38

Clustermeans:

1

2

3

Clusteringvector:

[1]2222222222222222222222222222222222222222

[41]2222222222113111111111111111111111111311

[81]1111111111111111111131333313333331133331

[121]313133113333313333133313331331

Withinclustersumofsquaresbycluster:

[1]

(between_SS/total_SS=%)

Availablecomponents:

[1]"cluster""centers""totss""withinss"""

[6]"betweenss""size""iter""ifault"

>table(iris$Species,kc$cluster)

123

setosa0500

versicolor4802

virginica14036

>plot(newiris[c("","")],col=kc$cluster)

>points(kc$centers[,c("","")],col=1:

3,pch=8,cex=2)

#K-Mediods进行聚类分析

>("cluster")

>library(cluster)

><-pam(iris,3)

>table(iris$Species,$clustering)

123

setosa5000

versicolor0347

virginica0491

>layout(matrix(c(1,2),1,2))

>plot

>layout(matrix

(1))

#hc

><-hclust(dist(iris[,1:

4]))

>plot(,hang=-1)

>plclust(,labels=FALSE,hang=-1)

>re<-,k=3)

><-cutree,3)

#利用剪枝函数cutree()参数h控制输出height=18时的系谱类别

>sapply(unique,

+function(g)iris$Species[==g])

[[1]]

[1]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa

[12]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa

[23]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa

[34]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa

[45]setosasetosasetosasetosasetosasetosa

Levels:

setosaversicolorvirginica

[[2]]

[1]versicolorversicolorversicolorversicolorversicolorversicolorversicolor

[8]versicolorversicolorversicolorversicolorversicolorversicolorversicolor

[15]versicolorversicolorversicolorversicolorversicolorversicolorversicolor

[22]versicolorversicolorvirginicavirginicavirginicavirginicavirginica

[29]virginicavirginicavirginicavirginicavirginicavirginicavirginica

[36]virginicavirginicavirginicavirginicavirginicavirginicavirginica

[43]virginicavirginicavirginicavirginicavirginicavirginicavirginica

[50]virginicavirginicavirginicavirginicavirginicavirginicavirginica

[57]virginicavirginicavirginicavirginicavirginicavirginicavirginica

[64]virginicavirginicavirginicavirginicavirginicavirginicavirginica

[71]virginicavirginica

Levels:

setosaversicolorvirginica

[[3]]

[1]versicolorversicolorversicolorversicolorversicolorversicolorversicolor

[8]versicolorversicolorversicolorversicolorversicolorversicolorversicolor

[15]versicolorversicolorversicolorversicolorversicolorversicolorversicolor

[22]versicolorversicolorversicolorversicolorversicolorversicolorvirginica

Levels:

setosaversicolorvirginica

>plot

>,k=4,border="lightgrey")#用浅灰色矩形框出4分类聚类结果

>,k=3,border="darkgrey")#用浅灰色矩形框出3分类聚类结果

>,k=7,which=c(2,6),border="darkgrey")

#DBSCAN#基于密度的聚类

>("fpc")

>library(fpc)

>ds1=dbscan(iris[,1:

4],eps=1,MinPts=5)#半径参数为1,密度阈值为5

>ds1

dbscanPts=150MinPts=5eps=1

12

border01

seed5099

total50100

>ds2=dbscan(iris[,1:

4],eps=4,MinPts=5)

>ds3=dbscan(iris[,1:

4],eps=4,MinPts=2)

>ds4=dbscan(iris[,1:

4],eps=8,MinPts=2)

>par(mfcol=c(2,2))

>plot(ds1,iris[,1:

4],main="1:

MinPts=5eps=1")

>plot(ds3,iris[,1:

4],main="3:

MinPts=2eps=4")

>plot(ds2,iris[,1:

4],main="2:

MinPts=5eps=4")

>plot(ds4,iris[,1:

4],main="4:

MinPts=2eps=8")

>d=dist(iris[,1:

4])#计算数据集的距离矩阵d

>max(d);min(d)#计算数据集样本的距离的最值

[1]

[1]0

>("ggplot2")

>library(ggplot2)

>interval=cut_interval(d,30)

>table(interval)

interval

[0,],],],],],]

88585876891831688

],],],],],]

543369379339335406

],],],],],]

458459465480468505

],],],],],]

349385321291187138

],],],],],]

97927850184

>(table(interval))

]

4

>for(iin3:

5)

+{for(jin1:

10)

+{ds=dbscan(iris[,1:

4],eps=i,MinPts=j)

+print(ds)

+}

+}

dbscanPts=150MinPts=1eps=3

1

seed150

total150

dbscanPts=150MinPts=2eps=3

1

seed150

total150

dbscanPts=150MinPts=3eps=3

1

seed150

total150

dbscanPts=150MinPts=4eps=3

1

seed150

total150

dbscanPts=150MinPts=5eps=3

1

seed150

total150

dbscanPts=150MinPts=6eps=3

1

seed150

total150

dbscanPts=150MinPts=7eps=3

1

seed150

total150

dbscanPts=150MinPts=8eps=3

1

seed150

total150

dbscanPts=150MinPts=9eps=3

1

seed150

total150

dbscanPts=150MinPts=10eps=3

1

seed150

total150

dbscanPts=150MinPts=1eps=4

1

seed150

total150

dbscanPts=150MinPts=2eps=4

1

seed150

total150

dbscanPts=150MinPts=3eps=4

1

seed150

total150

dbscanPts=150MinPts=4eps=4

1

seed150

total150

dbscanPts=150MinPts=5eps=4

1

seed150

total150

dbscanPts=150MinPts=6eps=4

1

seed150

total150

dbscanPts=150MinPts=7eps=4

1

seed150

total150

dbscanPts=150MinPts=8eps=4

1

seed150

total150

dbscanPts=150MinPts=9eps=4

1

seed150

total150

dbscanPts=150MinPts=10eps=4

1

seed150

total150

dbscanPts=150MinPts=1eps=5

1

seed150

total150

dbscanPts=150MinPts=2eps=5

1

seed150

total150

dbscanPts=150MinPts=3eps=5

1

seed150

total150

dbscanPts=150MinPts=4eps=5

1

seed150

total150

dbscanPts=150MinPts=5eps=5

1

seed150

total150

dbscanPts=150MinPts=6eps=5

1

seed150

total150

dbscanPts=150MinPts=7eps=5

1

seed150

total150

dbscanPts=150MinPts=8eps=5

1

seed150

total150

dbscanPts=150MinPts=9eps=5

1

seed150

total150

dbscanPts=150MinPts=10eps=5

1

seed150

total150

#30次dbscan的聚类结果

>ds5=dbscan(iris[,1:

4],eps=3,MinPts=2)

>ds6=dbscan(iris[,1:

4],eps=4,MinPts=5)

>ds7=dbscan(iris[,1:

4],eps=5,MinPts=9)

>par(mfcol=c(1,3))

>plot(ds5,iris[,1:

4],main="1:

MinPts=2eps=3")

>plot(ds6,iris[,1:

4],main="3:

MinPts=5eps=4")

>plot(ds7,iris[,1:

4],main="2:

MinPts=9eps=5")

#EM期望最大化聚类

>("mclust")

>library(mclust)

>fit_EM=Mclust(iris[,1:

4])

fitting...

|===========================================================================|100%

>summary(fit_EM)

----------------------------------------------------

GaussianfinitemixturemodelfittedbyEMalgorithm

----------------------------------------------------

MclustVEV(ellipsoidal,equalshape)modelwith2components:

ndfBICICL

15026

Clusteringtable:

12

50100

>summary(fit_EM,parameters=TRUE)

----------------------------------------------------

GaussianfinitemixturemodelfittedbyEMalgorithm

----------------------------------------------------

MclustVEV(ellipsoidal,equalshape)modelwith2components:

ndfBICICL

15026

Clusteringtable:

12

50100

Mixingprobabilities:

12

Means:

[,1][,2]

Variances:

[,,1]

0.0.

0.0.

[,,2]

0.0.

0.

0.0.

0.

>plot(fit_EM)#对EM聚类结果作图

Model-basedclusteringplots:

1:

BIC

2:

classification

3:

uncertainty

4:

density

Selection:

(下面显示选项)

#选1

#选2

#选3

#选4

Selection:

0

>iris_BIC=mclustBIC(iris[,1:

4])

fitting...

|===========================================================================|100%

>iris_BICsum=summary(iris_BIC,data=iris[,1:

4])

>iris_BICsum#获取数1据集iris在各模型和类别数下的BIC值

BestBICvalues:

VEV,2VEV,3VVV,2

BIC

BICdiff

Classificationtableformodel(VEV,2):

12

50100

>iris_BIC

BayesianInformationCriterion(BIC):

EIIVIIEEIVEIEVIVVIEEE

1

2

3

4

5

6

7

8

9

EVEVEEVVEEEVVEVEVVVVV

1

2

3

4

5NANA

6NA

7NA

8

9NA

Top3modelsbasedontheBICcriterion:

VEV,2VEV,3VVV,2

>par(mfcol=c(1,1))

>plot(iris_BIC,G=1:

7,col="yellow")

>mclust2Dplot(iris[,1:

2],

+classification=iris_BICsum$classification,

+parameters=iris_BICsum$parameters,col="yellow")

>iris_Dens=densityMclust(iris[,1:

2])#对每一个样本进行密度估计

fitting...

|===========================================================================|100%

>iris_Dens

'densityMclust'modelobject:

(VEV,2)

Availablecomponents:

[1]"call""data""modelName""n"

[5]"d""G""BIC""bic"

[9]"loglik""df""hypvol""parameters"

[13]"z""classification""uncertainty""density"

>plot(iris_Dens,iris[,1:

2],col="yellow",nlevels=55)##输入1或2

Model-baseddensityestimationplots:

1:

BIC

2:

density

Selection:

(下面显示选项)

#选1

#选2

Selection:

0

>plot(iris_Dens,type="persp",col=grey)

Model-baseddensityestimationplots:

1:

BIC

2:

density

Selection:

(下面显示选项)

#选1

#选2

Selection:

0

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 成人教育 > 成考

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1