机器学习实验报告DOC.docx
《机器学习实验报告DOC.docx》由会员分享,可在线阅读,更多相关《机器学习实验报告DOC.docx(23页珍藏版)》请在冰豆网上搜索。
机器学习实验报告DOC
《机器学习》
课内实验报告
(1)ID算法实现决策树
2015-2016学年第2学期
专业:
智能科学与技术
班级:
智能1301班
学号:
06133029
姓名:
张争辉
一、实验目的:
理解ID3算法的基本原理,并且编程实现。
二、实验要求:
使用C/C++/MATLAB实现ID3算法。
输入:
若干行,每行5个字符串,表示
OutlookTemperatureHumidityWindPlayball
如上表。
输出:
决策树。
实验结果如下:
输入:
SunnyHotHighWeakNo
SunnyHotHighStrongNo
OvercastHotHighWeakYes
RainMildHighWeakYes
RainCoolNormalWeakYes
RainCoolNormalStrongNo
OvercastCoolNormalStrongYes
SunnyMildHighWeakNo
SunnyCoolNormalWeakYes
RainMildNormalWeakYes
SunnyMildNormalStrongYes
OvercastMildHighStrongYes
OvercastHotNormalWeakYes
RainMildHighStrongNo
输出:
Outlook
RainWind
StrongNo
WeakYes
OvercastYes
SunnyHumidity
NormalYes
HighNo
三、具体实现:
实现算法如下:
#include
#include
#include
#include
usingnamespacestd;
#defineROW14
#defineCOL5
#definelog20.69314718055
typedefstructTNode
{
chardata[15];
charweight[15];
TNode*firstchild,*nextsibling;
}*tree;
typedefstructLNode
{
charOutLook[15];
charTemperature[15];
charHumidity[15];
charWind[15];
charPlayTennis[5];
LNode*next;
}*link;
typedefstructAttrNode
{
charattributes[15];//属性
intattr_Num;//属性的个数
AttrNode*next;
}*Attributes;
char*Examples[ROW][COL]={//"OverCast","Cool","High","Strong","No",
//"Rain","Hot","Normal","Strong","Yes",
"Sunny","Hot","High","Weak","No",
"Sunny","Hot","High","Strong","No",
"OverCast","Hot","High","Weak","Yes",
"Rain","Mild","High","Weak","Yes",
"Rain","Cool","Normal","Weak","Yes",
"Rain","Cool","Normal","Strong","No",
"OverCast","Cool","Normal","Strong","Yes",
"Sunny","Mild","High","Weak","No",
"Sunny","Cool","Normal","Weak","Yes",
"Rain","Mild","Normal","Weak","Yes",
"Sunny","Mild","Normal","Strong","Yes",
"OverCast","Mild","Normal","Strong","Yes",
"OverCast","Hot","Normal","Weak","Yes",
"Rain","Mild","High","Strong","No"
};
char*Attributes_kind[4]={"OutLook","Temperature","Humidity","Wind"};
intAttr_kind[4]={3,3,2,2};
char*OutLook_kind[3]={"Sunny","OverCast","Rain"};
char*Temperature_kind[3]={"Hot","Mild","Cool"};
char*Humidity_kind[2]={"High","Normal"};
char*Wind_kind[2]={"Weak","Strong"};
/*inti_Exampple[14][5]={0,0,0,0,1,
0,0,0,1,1,
1,0,0,1,0,
2,1,0,0,0,
2,2,1,0,0,
2,2,1,1,1,
1,2,1,1,0,
0,1,0,0,1,
0,2,1,0,0,
2,1,1,0,0,
0,1,1,1,0,
1,1,1,1,0,
1,1,1,0,0,
2,1,0,0,1
};*/
voidtreelists(treeT);
voidInitAttr(Attributes&attr_link,char*Attributes_kind[],intAttr_kind[]);
voidInitLink(link&L,char*Examples[][COL]);
voidID3(tree&T,linkL,linkTarget_Attr,Attributesattr);
voidPN_Num(linkL,int&positve,int&negative);
doubleGain(intpositive,intnegative,char*atrribute,linkL,Attributesattr_L);
voidmain()
{
linkLL,p;
Attributesattr_L,q;
treeT;
T=newTNode;
T->firstchild=T->nextsibling=NULL;
strcpy(T->weight,"");
strcpy(T->data,"");
attr_L=newAttrNode;
attr_L->next=NULL;
LL=newLNode;
LL->next=NULL;
//成功建立两个链表
InitLink(LL,Examples);
InitAttr(attr_L,Attributes_kind,Attr_kind);
ID3(T,LL,NULL,attr_L);
cout<<"决策树以广义表形式输出如下:
"<treelists(T);//以广义表的形式输出树
//cout<cout<}
//以广义表的形式输出树
voidtreelists(treeT)
{
treep;
if(!
T)
return;
cout<<"{"<weight<<"}";
cout<data;
p=T->firstchild;
if(p)
{
cout<<"(";
while(p)
{
treelists(p);
p=p->nextsibling;
if(p)cout<<',';
}
cout<<")";
}
}
voidInitAttr(Attributes&attr_link,char*Attributes_kind[],intAttr_kind[])
{
Attributesp;
for(inti=0;i<4;i++)
{
p=newAttrNode;
p->next=NULL;
strcpy(p->attributes,Attributes_kind[i]);
p->attr_Num=Attr_kind[i];
p->next=attr_link->next;
attr_link->next=p;
}
}
voidInitLink(link&LL,char*Examples[][COL])
{
linkp;
for(inti=0;i{
p=newLNode;
p->next=NULL;
strcpy(p->OutLook,Examples[i][0]);
strcpy(p->Temperature,Examples[i][1]);
strcpy(p->Humidity,Examples[i][2]);
strcpy(p->Wind,Examples[i][3]);
strcpy(p->PlayTennis,Examples[i][4]);
p->next=LL->next;
LL->next=p;
}
}
voidPN_Num(linkL,int&positve,int&negative)
{
positve=0;
negative=0;
linkp;
p=L->next;
while(p)
{
if(strcmp(p->PlayTennis,"No")==0)
negative++;
elseif(strcmp(p->PlayTennis,"Yes")==0)
positve++;
p=p->next;
}
}
//计算信息增益
//linkL:
样本集合S
//attr_L:
属性集合
doubleGain(intpositive,intnegative,char*atrribute,linkL,Attributesattr_L)
{
intatrr_kinds;//每个属性中的值的个数
Attributesp=attr_L->next;
linkq=L->next;
intattr_th=0;//第几个属性
while(p)
{
if(strcmp(p->attributes,atrribute)==0)
{
atrr_kinds=p->attr_Num;
break;
}
p=p->next;
attr_th++;
}
doubleentropy,gain=0;
doublep1=1.0*positive/(positive+negative);
doublep2=1.0*negative/(positive+negative);
entropy=-p1*log(p1)/log2-p2*log(p2)/log2;//集合熵
gain=entropy;
//获取每个属性值在训练样本中出现的个数
//获取每个属性值所对应的正例和反例的个数
//声明一个3*atrr_kinds的数组
int**kinds=newint*[3];
for(intj=0;j<3;j++)
{
kinds[j]=newint[atrr_kinds];//保存每个属性值在训练样本中出现的个数
}
//初始化
for(intj=0;j<3;j++)
{
for(inti=0;i{
kinds[j][i]=0;
}
}
while(q)
{
if(strcmp("OutLook",atrribute)==0)
{
for(inti=0;i{
if(strcmp(q->OutLook,OutLook_kind[i])==0)
{
kinds[0][i]++;
if(strcmp(q->PlayTennis,"Yes")==0)
kinds[1][i]++;
else
kinds[2][i]++;
}
}
}
elseif(strcmp("Temperature",atrribute)==0)
{
for(inti=0;i{
if(strcmp(q->Temperature,Temperature_kind[i])==0)
{
kinds[0][i]++;
if(strcmp(q->PlayTennis,"Yes")==0)
kinds[1][i]++;
else
kinds[2][i]++;
}
}
}
elseif(strcmp("Humidity",atrribute)==0)
{
for(inti=0;i{
if(strcmp(q->Humidity,Humidity_kind[i])==0)
{
kinds[0][i]++;
if(strcmp(q->PlayTennis,"Yes")==0)
kinds[1][i]++;//
else
kinds[2][i]++;
}
}
}
elseif(strcmp("Wind",atrribute)==0)
{
for(inti=0;i{
if(strcmp(q->Wind,Wind_kind[i])==0)
{
kinds[0][i]++;
if(strcmp(q->PlayTennis,"Yes")==0)
kinds[1][i]++;
else
kinds[2][i]++;
}
}
}
q=q->next;
}
//计算信息增益
double*gain_kind=newdouble[atrr_kinds];
intpositive_kind=0,negative_kind=0;
for(intj=0;j{
if(kinds[0][j]!
=0&&kinds[1][j]!
=0&&kinds[2][j]!
=0)
{
p1=1.0*kinds[1][j]/kinds[0][j];
p2=1.0*kinds[2][j]/kinds[0][j];
gain_kind[j]=-p1*log(p1)/log2-p2*log(p2)/log2;
gain=gain-(1.0*kinds[0][j]/(positive+negative))*gain_kind[j];
}
else
gain_kind[j]=0;
}
returngain;
}
//在ID3算法中的训练样本子集合与属性子集合的链表需要进行清空
voidFreeLink(link&Link)
{
linkp,q;
p=Link->next;
Link->next=NULL;
while(p)
{
q=p;
p=p->next;
free(q);
}
}
voidID3(tree&T,linkL,linkTarget_Attr,Attributesattr)
{
Attributesp,max,attr_child,p1;
linkq,link_child,q1;
treer,tree_p;
intpositive=0,negative=0;
PN_Num(L,positive,negative);
//初始化两个子集合
attr_child=newAttrNode;
attr_child->next=NULL;
link_child=newLNode;
link_child->next=NULL;
if(positive==0)//全是反例
{
strcpy(T->data,"No");
return;
}
elseif(negative==0)//全是正例
{
strcpy(T->data,"Yes");
return;
}
p=attr->next;//属性链表
doublegain,g=0;
/************************************************************************/
/*建立属性子集合与训练样本子集合有两个方案:
一:
在原来链表的基础上进行删除;
二:
另外申请空间进行存储子集合;
采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱
*/
/************************************************************************/
if(p)
{
while(p)
{
gain=Gain(positive,negative,p->attributes,L,attr);
cout<attributes<<""<if(gain>g)
{
g=gain;
max=p;//寻找信息增益最大的属性
}
p=p->next;
}
strcpy(T->data,max->attributes);//增加决策树的节点
cout<<"信息增益最大的属性:
max->attributes="<attributes<//下面开始建立决策树
//创建属性子集合
p=attr->next;
while(p)
{
if(strcmp(p->attributes,max->attributes)!
=0)
{
p1=newAttrNode;
strcpy(p1->attributes,p->attributes);
p1->attr_Num=p->attr_Num;
p1->next=NULL;
p1->next=attr_child->next;
attr_child->next=p1;
}
p=p->next;
}
//需要区分出是哪一种属性
//建立每一层的第一个节点
if(strcmp("OutLook",max->attributes)==0)
{
r=newTNode;
r->firstchild=r->nextsibling=NULL;
strcpy(r->weight,OutLook_kind[0]);
T->firstchild=r;
//获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child
q=L->next;
while(q)
{
if(strcmp(q->OutLook,OutLook_kind[0])==0)
{
q1=newLNode;
strcpy(q1->OutLook,q->OutLook);
strcpy(q1->Humidity,q->Humidity);
strcpy(q1->Temperature,q->Temperature);
strcpy(q1->Wind,q->Wind);
strcpy(q1->PlayTennis,q->PlayTennis);
q1->next=NULL;
q1->next=link_child->next;
link_child->next=q1;
}
q=q->next;
}
}
elseif(strcmp("Temperature",max->attributes)==0)
{
r=newTNode;
r->firstchild=r->nextsibling=NULL;
strcpy(r->weight,Temperature_kind[0]);
T->firstchild=r;
//获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child
q=L->next;
while(q)
{
if(strcmp(q->Temperature,Temperature_kind[0])==0)
{
q1=newLNode;
strcpy(q1->OutLook,q->OutLook);
strcpy(q1->Humidity,q->Humidity);
strcpy(q1->Temperature,q->Temperature);
strcpy(q1->Wind,q->Wind);
strcpy(q1->PlayTennis,q->PlayTennis);
q1->next=NULL;
q1->next=link_child->next;
link_child->next=q1;
}
q=q->next;
}
}
elseif(strcmp("Humidity",max->attributes)==0)
{
r=newTNode;
r->firstchild=r->nextsibling=NULL;
strcpy(r->weight,Humidity_kind[0]);
T->firstchild=r;
//获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child
|