机器学习实验报告DOC.docx

上传人:b****4 文档编号:4380985 上传时间:2022-12-01 格式:DOCX 页数:23 大小:104.52KB
下载 相关 举报
机器学习实验报告DOC.docx_第1页
第1页 / 共23页
机器学习实验报告DOC.docx_第2页
第2页 / 共23页
机器学习实验报告DOC.docx_第3页
第3页 / 共23页
机器学习实验报告DOC.docx_第4页
第4页 / 共23页
机器学习实验报告DOC.docx_第5页
第5页 / 共23页
点击查看更多>>
下载资源
资源描述

机器学习实验报告DOC.docx

《机器学习实验报告DOC.docx》由会员分享,可在线阅读,更多相关《机器学习实验报告DOC.docx(23页珍藏版)》请在冰豆网上搜索。

机器学习实验报告DOC.docx

机器学习实验报告DOC

 

《机器学习》

课内实验报告

(1)ID算法实现决策树

2015-2016学年第2学期

 

专业:

智能科学与技术

班级:

智能1301班

学号:

06133029

姓名:

张争辉

 

一、实验目的:

理解ID3算法的基本原理,并且编程实现。

二、实验要求:

使用C/C++/MATLAB实现ID3算法。

输入:

若干行,每行5个字符串,表示

OutlookTemperatureHumidityWindPlayball

如上表。

输出:

决策树。

实验结果如下:

输入:

SunnyHotHighWeakNo

SunnyHotHighStrongNo

OvercastHotHighWeakYes

RainMildHighWeakYes

RainCoolNormalWeakYes

RainCoolNormalStrongNo

OvercastCoolNormalStrongYes

SunnyMildHighWeakNo

SunnyCoolNormalWeakYes

RainMildNormalWeakYes

SunnyMildNormalStrongYes

OvercastMildHighStrongYes

OvercastHotNormalWeakYes

RainMildHighStrongNo

输出:

Outlook

RainWind

StrongNo

WeakYes

OvercastYes

SunnyHumidity

NormalYes

HighNo

三、具体实现:

实现算法如下:

#include

#include

#include

#include

usingnamespacestd;

#defineROW14

#defineCOL5

#definelog20.69314718055

typedefstructTNode

{

chardata[15];

charweight[15];

TNode*firstchild,*nextsibling;

}*tree;

typedefstructLNode

{

charOutLook[15];

charTemperature[15];

charHumidity[15];

charWind[15];

charPlayTennis[5];

LNode*next;

}*link;

typedefstructAttrNode

{

charattributes[15];//属性

intattr_Num;//属性的个数

AttrNode*next;

}*Attributes;

char*Examples[ROW][COL]={//"OverCast","Cool","High","Strong","No",

//"Rain","Hot","Normal","Strong","Yes",

"Sunny","Hot","High","Weak","No",

"Sunny","Hot","High","Strong","No",

"OverCast","Hot","High","Weak","Yes",

"Rain","Mild","High","Weak","Yes",

"Rain","Cool","Normal","Weak","Yes",

"Rain","Cool","Normal","Strong","No",

"OverCast","Cool","Normal","Strong","Yes",

"Sunny","Mild","High","Weak","No",

"Sunny","Cool","Normal","Weak","Yes",

"Rain","Mild","Normal","Weak","Yes",

"Sunny","Mild","Normal","Strong","Yes",

"OverCast","Mild","Normal","Strong","Yes",

"OverCast","Hot","Normal","Weak","Yes",

"Rain","Mild","High","Strong","No"

};

char*Attributes_kind[4]={"OutLook","Temperature","Humidity","Wind"};

intAttr_kind[4]={3,3,2,2};

char*OutLook_kind[3]={"Sunny","OverCast","Rain"};

char*Temperature_kind[3]={"Hot","Mild","Cool"};

char*Humidity_kind[2]={"High","Normal"};

char*Wind_kind[2]={"Weak","Strong"};

/*inti_Exampple[14][5]={0,0,0,0,1,

0,0,0,1,1,

1,0,0,1,0,

2,1,0,0,0,

2,2,1,0,0,

2,2,1,1,1,

1,2,1,1,0,

0,1,0,0,1,

0,2,1,0,0,

2,1,1,0,0,

0,1,1,1,0,

1,1,1,1,0,

1,1,1,0,0,

2,1,0,0,1

};*/

voidtreelists(treeT);

voidInitAttr(Attributes&attr_link,char*Attributes_kind[],intAttr_kind[]);

voidInitLink(link&L,char*Examples[][COL]);

voidID3(tree&T,linkL,linkTarget_Attr,Attributesattr);

voidPN_Num(linkL,int&positve,int&negative);

doubleGain(intpositive,intnegative,char*atrribute,linkL,Attributesattr_L);

voidmain()

{

linkLL,p;

Attributesattr_L,q;

treeT;

T=newTNode;

T->firstchild=T->nextsibling=NULL;

strcpy(T->weight,"");

strcpy(T->data,"");

attr_L=newAttrNode;

attr_L->next=NULL;

LL=newLNode;

LL->next=NULL;

//成功建立两个链表

InitLink(LL,Examples);

InitAttr(attr_L,Attributes_kind,Attr_kind);

ID3(T,LL,NULL,attr_L);

cout<<"决策树以广义表形式输出如下:

"<

treelists(T);//以广义表的形式输出树

//cout<

cout<

}

//以广义表的形式输出树

voidtreelists(treeT)

{

treep;

if(!

T)

return;

cout<<"{"<weight<<"}";

cout<data;

p=T->firstchild;

if(p)

{

cout<<"(";

while(p)

{

treelists(p);

p=p->nextsibling;

if(p)cout<<',';

}

cout<<")";

}

}

voidInitAttr(Attributes&attr_link,char*Attributes_kind[],intAttr_kind[])

{

Attributesp;

for(inti=0;i<4;i++)

{

p=newAttrNode;

p->next=NULL;

strcpy(p->attributes,Attributes_kind[i]);

p->attr_Num=Attr_kind[i];

p->next=attr_link->next;

attr_link->next=p;

}

}

voidInitLink(link&LL,char*Examples[][COL])

{

linkp;

for(inti=0;i

{

p=newLNode;

p->next=NULL;

strcpy(p->OutLook,Examples[i][0]);

strcpy(p->Temperature,Examples[i][1]);

strcpy(p->Humidity,Examples[i][2]);

strcpy(p->Wind,Examples[i][3]);

strcpy(p->PlayTennis,Examples[i][4]);

p->next=LL->next;

LL->next=p;

}

}

voidPN_Num(linkL,int&positve,int&negative)

{

positve=0;

negative=0;

linkp;

p=L->next;

while(p)

{

if(strcmp(p->PlayTennis,"No")==0)

negative++;

elseif(strcmp(p->PlayTennis,"Yes")==0)

positve++;

p=p->next;

}

}

//计算信息增益

//linkL:

样本集合S

//attr_L:

属性集合

doubleGain(intpositive,intnegative,char*atrribute,linkL,Attributesattr_L)

{

intatrr_kinds;//每个属性中的值的个数

Attributesp=attr_L->next;

linkq=L->next;

intattr_th=0;//第几个属性

while(p)

{

if(strcmp(p->attributes,atrribute)==0)

{

atrr_kinds=p->attr_Num;

break;

}

p=p->next;

attr_th++;

}

doubleentropy,gain=0;

doublep1=1.0*positive/(positive+negative);

doublep2=1.0*negative/(positive+negative);

entropy=-p1*log(p1)/log2-p2*log(p2)/log2;//集合熵

gain=entropy;

//获取每个属性值在训练样本中出现的个数

//获取每个属性值所对应的正例和反例的个数

//声明一个3*atrr_kinds的数组

int**kinds=newint*[3];

for(intj=0;j<3;j++)

{

kinds[j]=newint[atrr_kinds];//保存每个属性值在训练样本中出现的个数

}

//初始化

for(intj=0;j<3;j++)

{

for(inti=0;i

{

kinds[j][i]=0;

}

}

while(q)

{

if(strcmp("OutLook",atrribute)==0)

{

for(inti=0;i

{

if(strcmp(q->OutLook,OutLook_kind[i])==0)

{

kinds[0][i]++;

if(strcmp(q->PlayTennis,"Yes")==0)

kinds[1][i]++;

else

kinds[2][i]++;

}

}

}

elseif(strcmp("Temperature",atrribute)==0)

{

for(inti=0;i

{

if(strcmp(q->Temperature,Temperature_kind[i])==0)

{

kinds[0][i]++;

if(strcmp(q->PlayTennis,"Yes")==0)

kinds[1][i]++;

else

kinds[2][i]++;

}

}

}

elseif(strcmp("Humidity",atrribute)==0)

{

for(inti=0;i

{

if(strcmp(q->Humidity,Humidity_kind[i])==0)

{

kinds[0][i]++;

if(strcmp(q->PlayTennis,"Yes")==0)

kinds[1][i]++;//

else

kinds[2][i]++;

}

}

}

elseif(strcmp("Wind",atrribute)==0)

{

for(inti=0;i

{

if(strcmp(q->Wind,Wind_kind[i])==0)

{

kinds[0][i]++;

if(strcmp(q->PlayTennis,"Yes")==0)

kinds[1][i]++;

else

kinds[2][i]++;

}

}

}

q=q->next;

}

//计算信息增益

double*gain_kind=newdouble[atrr_kinds];

intpositive_kind=0,negative_kind=0;

for(intj=0;j

{

if(kinds[0][j]!

=0&&kinds[1][j]!

=0&&kinds[2][j]!

=0)

{

p1=1.0*kinds[1][j]/kinds[0][j];

p2=1.0*kinds[2][j]/kinds[0][j];

gain_kind[j]=-p1*log(p1)/log2-p2*log(p2)/log2;

gain=gain-(1.0*kinds[0][j]/(positive+negative))*gain_kind[j];

}

else

gain_kind[j]=0;

}

returngain;

}

//在ID3算法中的训练样本子集合与属性子集合的链表需要进行清空

voidFreeLink(link&Link)

{

linkp,q;

p=Link->next;

Link->next=NULL;

while(p)

{

q=p;

p=p->next;

free(q);

}

}

voidID3(tree&T,linkL,linkTarget_Attr,Attributesattr)

{

Attributesp,max,attr_child,p1;

linkq,link_child,q1;

treer,tree_p;

intpositive=0,negative=0;

PN_Num(L,positive,negative);

//初始化两个子集合

attr_child=newAttrNode;

attr_child->next=NULL;

link_child=newLNode;

link_child->next=NULL;

if(positive==0)//全是反例

{

strcpy(T->data,"No");

return;

}

elseif(negative==0)//全是正例

{

strcpy(T->data,"Yes");

return;

}

p=attr->next;//属性链表

doublegain,g=0;

/************************************************************************/

/*建立属性子集合与训练样本子集合有两个方案:

一:

在原来链表的基础上进行删除;

二:

另外申请空间进行存储子集合;

采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱

*/

/************************************************************************/

if(p)

{

while(p)

{

gain=Gain(positive,negative,p->attributes,L,attr);

cout<attributes<<""<

if(gain>g)

{

g=gain;

max=p;//寻找信息增益最大的属性

}

p=p->next;

}

strcpy(T->data,max->attributes);//增加决策树的节点

cout<<"信息增益最大的属性:

max->attributes="<attributes<

//下面开始建立决策树

//创建属性子集合

p=attr->next;

while(p)

{

if(strcmp(p->attributes,max->attributes)!

=0)

{

p1=newAttrNode;

strcpy(p1->attributes,p->attributes);

p1->attr_Num=p->attr_Num;

p1->next=NULL;

p1->next=attr_child->next;

attr_child->next=p1;

}

p=p->next;

}

//需要区分出是哪一种属性

//建立每一层的第一个节点

if(strcmp("OutLook",max->attributes)==0)

{

r=newTNode;

r->firstchild=r->nextsibling=NULL;

strcpy(r->weight,OutLook_kind[0]);

T->firstchild=r;

//获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child

q=L->next;

while(q)

{

if(strcmp(q->OutLook,OutLook_kind[0])==0)

{

q1=newLNode;

strcpy(q1->OutLook,q->OutLook);

strcpy(q1->Humidity,q->Humidity);

strcpy(q1->Temperature,q->Temperature);

strcpy(q1->Wind,q->Wind);

strcpy(q1->PlayTennis,q->PlayTennis);

q1->next=NULL;

q1->next=link_child->next;

link_child->next=q1;

}

q=q->next;

}

}

elseif(strcmp("Temperature",max->attributes)==0)

{

r=newTNode;

r->firstchild=r->nextsibling=NULL;

strcpy(r->weight,Temperature_kind[0]);

T->firstchild=r;

//获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child

q=L->next;

while(q)

{

if(strcmp(q->Temperature,Temperature_kind[0])==0)

{

q1=newLNode;

strcpy(q1->OutLook,q->OutLook);

strcpy(q1->Humidity,q->Humidity);

strcpy(q1->Temperature,q->Temperature);

strcpy(q1->Wind,q->Wind);

strcpy(q1->PlayTennis,q->PlayTennis);

q1->next=NULL;

q1->next=link_child->next;

link_child->next=q1;

}

q=q->next;

}

}

elseif(strcmp("Humidity",max->attributes)==0)

{

r=newTNode;

r->firstchild=r->nextsibling=NULL;

strcpy(r->weight,Humidity_kind[0]);

T->firstchild=r;

//获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 医药卫生 > 临床医学

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1