ID3算法源程序.docx

上传人:b****5 文档编号:3720305 上传时间:2022-11-24 格式:DOCX 页数:13 大小:18.79KB
下载 相关 举报
ID3算法源程序.docx_第1页
第1页 / 共13页
ID3算法源程序.docx_第2页
第2页 / 共13页
ID3算法源程序.docx_第3页
第3页 / 共13页
ID3算法源程序.docx_第4页
第4页 / 共13页
ID3算法源程序.docx_第5页
第5页 / 共13页
点击查看更多>>
下载资源
资源描述

ID3算法源程序.docx

《ID3算法源程序.docx》由会员分享,可在线阅读,更多相关《ID3算法源程序.docx(13页珍藏版)》请在冰豆网上搜索。

ID3算法源程序.docx

ID3算法源程序

ID3算法的源程序(C语言)2005-3-25

PROTO.H

ENTROPYNEGnegentropy(REAL**,UINT,NODE*,UINT);

voidprint_tree(NODE*,CHAR**);

voidfree_tree(NODE  *);

NODE*ID3(MATRIX*,NODE*,UINT,UINT);

voiderr_exit(CHAR*,UINT);

MATRIX*build_matrix(UINT,UINT);

voidfree_matrix(MATRIX*);

voidread_matrix(CHAR*,MATRIX*);

voidfile_size(CHAR*,UINT*,UINT*);

CHAR**read_tags(CHAR*,UINT);

voidfree_tags(CHAR**,UINT);

ID3.h

typedefunsignedint  UINT;

typedefunsignedlongULONG;

typedef          charCHAR;

typedefunsignedcharBOOL;

typedefdouble        REAL;

typedefstructnode{

  UINTidx;/*IDcodeforattribute*/

  REALthreshold;/*Numericalthresholdforattributetest*/

  structnode*on;/*Addressof'on'node*/

  structnode*off;/*Addressof'off'node*/

  structnode*parent;/*Addessofparentnode*/

}NODE;

typedefstructne_struct{

    REALne;

    UINTstatus;

}NEGENTROPY;

typedefstructmatrix{

  UINTwidth;

  UINTheight;

  REAL**data;

}MATRIX;

enumUINT{INACTIVE,OFF,ON};

#defineLN_20.693147180559945309417

#defineentropy(x)(x>0?

x*log(x)/LN_2:

0.0)

/*

*FILE:

id3.c

*

*Author:

AndrewColin

*

*DISCLAIMER:

Noliabilityisassumedbytheauthorforanyusemade

*ofthisprogram.

*

*DISTRIBUTION:

Anyusemaybemadeofthisprogram,aslongasthe

*clearacknowledgmentismadetotheauthorincodeandruntime

*executables

*/

#include

#include

#include

#include

#include

#include

#include

#include

#include"id3.h"

#include"proto.h"

/*-------------------------------------------------------------------*/

MATRIX*build_matrix(UINTwidth,UINTheight)

{

    MATRIX*_matrix;

    UINTi;

    _matrix=(MATRIX*)malloc(sizeof(MATRIX));

    if(!

_matrix)

        err_exit(__FILE__,__LINE__);

    _matrix->width  =width;

    _matrix->height=height;

    _matrix->data=(REAL**)malloc(height*sizeof(REAL*));

    if(_matrix->data==NULL)

        err_exit(__FILE__,__LINE__);

    for(i=0;i

    {

        _matrix->data[i]=(REAL*)malloc(width*sizeof(REAL));

        if(_matrix->data[i]==NULL)

            err_exit(__FILE__,__LINE__);

    }

    return_matrix;

}

/*-------------------------------------------------------------------*/

/*

*Standarderrorhandlerfunction

*/

voiderr_exit(CHAR*file,UINTline)

{

    printf("\nFatalerrorinfile%s,line%u",file,line);

    exit(0);

}

/*-------------------------------------------------------------------*/

voidfile_size(CHAR*file_name,UINT*width,UINT*height)

/*

*Giventhenameofafileofnumericdata,thisroutinecounts

*thenumbersofrowsandcolumns.It'sassumedthatthenumber

*ofentriesisthesameineachrow,andanerrorisflaggedifthis

*isnotthecase.

*

*/

{

    FILE*f;

    UINTbuf_size=0xFF,_width=0;

    CHAR*buffer,*ptr;

    *width=*height=0;

    buffer=(CHAR*)malloc(buf_size*sizeof(CHAR));

    if(buffer==NULL)

        err_exit(__FILE__,__LINE__);

    /*Openpricefile-abortiffilenameinvalid*/

    f=fopen(file_name,"r");

    if(f==NULL)

    {

        printf("\nFilenotfound:

%s\n",file_name);

        err_exit(__FILE__,__LINE__);

    }

    /*Getnumberofentriesinfirstrow*/

    if(fgets(buffer,buf_size,f)!

=NULL)

    {

        ++*height;

        ptr=strtok(buffer,",");

        while(ptr!

=NULL)

        {

            ++*width;

            ptr=strtok(NULL,",");

        }

    }

    /*Countnumbersofsubsequentrows*/

    while(!

feof(f))

    {

        if(fgets(buffer,buf_size,f)!

=NULL)

        {

            if(strlen(buffer)>strlen("\n"))  /*iflineismorethanaNLchar*/

            {

                ++*height;

                _width=0;

                ptr=strtok(buffer,",");

                while(ptr!

=NULL)

                {

                    ++_width;

                    ptr=strtok(NULL,",");

                }

                if(*width!

=_width)

                {

                    printf("\nNumberofentriesinfile%sdidnotagree",file_name);

                    err_exit(__FILE__,__LINE__);

                }

            }

        }

    }

    free(buffer);

}

/*-------------------------------------------------------------------*/

voidfree_matrix(MATRIX*_matrix)

{

    UINTi;

    for(i=0;i<_matrix->height;i++)

        free(_matrix->data[i]);

    free(_matrix->data);

    free(_matrix);

}

/*-------------------------------------------------------------------*/

voidfree_tags(CHAR**varname,UINTwidth)

{

    UINTi;

    for(i=0;i

        free(varname[i]);

    free(varname);

}

/*-------------------------------------------------------------------*/

voidfree_tree(NODE  *node)

{

    /*

    *  Freesthememoryallocatedtoatreestructure

    */

    if(node==NULL)

        return;

    else

    {

        free_tree(node->on);

        free_tree(node->off);

        free(node);

    }

}

/*-------------------------------------------------------------------*/

NODE*ID3(MATRIX*matrix,NODE*parent,UINTtarget,UINTstate)

/*Routinetobuildadecisiontree,basedonQuinlan'sID3algorithm.*/

{

    NEGENTROPYnegentropy_struct;

    NODE*node;

    UINTn_vars=matrix->width,n_samples=matrix->height,i,j,split;

    REAL**data=matrix->data;

    REALbest_threshold,min_negentropy,_negentropy;

    /*Allocatememoryforthisnode*/

    node=(NODE*)malloc(sizeof(NODE));

    if(!

node)

        err_exit(__FILE__,__LINE__);

    /*Setuplinksindecisiontree*/

    node->parent=parent;  /*Setaddressofparentnode*/

    if(parent!

=NULL)/*parenttochild;notrelevantforrootnode*/

    {

        /*Passaddressofthisnodetotheparentnode*/

        if(state==ON)

            parent->on=node;

        else

            if(state==OFF)

                parent->off=node;

    }

    /*

    *Selectattributewithlowestnegentropyforsplitting.Scanthrough

    *ALLattributes(exceptthetarget)andALLdatasamples.Thisis

    *prettyinefficientfordatasetswithrepeatedvalues,butwilldo

    *forillustrativepurposes

    */

    min_negentropy=1.0;

    for(i=0;i

    {

        for(j=0;j

        {

            if(i!

=target)

            {

                /*Settrialvaluesforthisnode...*/

                node->idx=i;

                node->threshold=data[j][i];

                /*...andcalculatethenegentropyofthispartition*/

                negentropy_struct=negentropy(data,n_samples,node,target);

                _negentropy=negentropy_struct.ne;

                /*Ifthisnegentropyislowerthananyother,retainthe

                      indexandthresholdforfutureuse*/

                if(_negentropy

                {

                    min_negentropy=_negentropy;

                    split=i;

                    best_threshold=data[j][i];

                }

            }/*if(i!

=target)*/

        }/*for(j=0;j

    }/*for(i=0;i

    /*Savethecombinationofbestattributeandthresholdvalue*/

    node->idx=split;

    node->threshold=best_threshold;

    /*

    *Ifthenegentropyroutinefounditselfatanend-of-branch

    *forthedecisiontree,the'status'flagin'negentropy_struct'

    *issettoONorOFFandthenodelabelledaccordingly.Otherwise,

    *ID3continuestocallitselfuntilallend-of-branchnodesare

    *found.

    */

    if  (negentropy_struct.status!

=INACTIVE)

    {

        node->on=node->off=NULL;

        node->idx=negentropy_struct.status;

    }

    else

    {

        node->on  =ID3(matrix,node,target,ON);

        node->off=ID3(matrix,node,target,OFF);

    }

    returnnode;

}

/*-------------------------------------------------------------------*/

voidmain(intargv,char*argc[])

{

    MATRIX*matrix;

    NODE*node;

    UINTtarget,n_vars,n_samples;

    CHARdata_file[13],tag_file[13];  /*LongestfilenameinDOS*/

    CHAR**tag_names;

    /*Setupfilenames*/

    if(argv!

=2)

    {

        printf("\nUsage:

id3[datafile]");

        exit(0);

    }

    else

    {

        printf("\nWelcometoID3");

        printf("\nLastcompiledon%s,%s",__DATE__,__TIME__);

        printf("\n");

        strcpy(data_file,argc[1]);

        strcpy(tag_file,  argc[1]);

        strcat(data_file,".dat");

        strcat(tag_file,  ".tag");

    }

    /*Readdimensionsofdatafile*/

    file_size(data_file,&n_vars,&n_samples);

    /*Readlabelsforcolumnsofdata*/

    tag_names=read_tags(tag_file,n_vars);

    /*Allocatestoragefordata...*/

    matrix=build_matrix(n_vars,n_samples);

    /*...andreaditfromdisk*/

    read_matrix(data_file,matrix);

    /*Classificationtargetislastcolumn*/

    target=n_vars-1;

    /*Returnrootofdecisiontree-ID3continuestocallitself

      recursively*/

    node=ID3(matrix,NULL,target,0);

    print_tree(node,tag_names);

    printf("\n");

    free_tags(tag_names,n_vars);

    free_matrix(matrix);

    free_tree(node);

}

/*-------------------------------------------------------------------*/

NEGENTROPYnegentropy(REAL**data,

    UINT  n_samples,

    NODE  *local,

    UINT  target)

{

    /*

    *Calculatestheentropyofclassif

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 小学教育 > 语文

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1