西电数字挖掘上机报告文档格式.docx
《西电数字挖掘上机报告文档格式.docx》由会员分享,可在线阅读,更多相关《西电数字挖掘上机报告文档格式.docx(27页珍藏版)》请在冰豆网上搜索。
Ck=apriori-gen(Lk-1);
//新的候选集
(4)
foralltransactionst&
Icirc;
Ddobegin
(5)
Ct=subset(Ck,t);
//事务t中包含的候选集
(6)
forallcandidatesc&
Ct
do
(7)
c.count++;
(8)
end
(9)
Lk={c&
Ck|c.count&
sup3;
minsup}
(10)
(11)
Answer=∪kLk;
1)编程实现算法
1.Item.h源文件
#include<
set>
usingnamespacestd;
classItem
{
public:
Item(inti):
id(i),support(0),children(0){}
Item(constItem&
i):
id(i.id),support(i.support),children(i.children){}
~Item(){}
intgetId()const{returnid;
}
intIncrement(intinc=1)const{returnsupport+=inc;
set<
Item>
*makeChildren()const;
intdeleteChildren()const;
intgetSupport()const{returnsupport;
*getChildren()const{returnchildren;
booloperator<
(constItem&
i)const{returnid<
i.id;
private:
constintid;
mutableintsupport;
mutableset<
*children;
};
2.AprioriRules.h源文件
classItemset
public:
Itemset(intl):
length(l){t=newint[l];
Itemset(constItemset&
is):
length(is.length),support(is.support)
{
t=newint[length];
for(inti=0;
i<
length;
i++)t[i]=is.t[i];
}
~Itemset(){delete[]t;
intlength;
int*t;
intsupport;
classAprioriRules
AprioriRules();
~AprioriRules();
voidsetData(char*fn);
intsetOutputRules(char*fn);
voidsetMinConf(floatmc){minconf=mc;
intgenerateRules();
voidsetMaxHead(intm){maxhead=m;
voidsetVerbose(){verbose=true;
private:
Itemset*getNextSet();
intgenerateRules(set<
*current,int*iset,intdepth);
intprocessSet(set<
*items,intsl,int*iset,intsup,int*head,intspos,intdepth);
Item*trie;
floatminconf;
intmaxhead;
ofstreamrulesout;
FILE*data;
boolverbose;
3.AprioriRules.cpp源文件
iostream>
fstream>
stdio.h>
vector>
time.h>
#include"
Item.h"
AprioriRules.h"
AprioriRules:
:
AprioriRules()
data=0;
minconf=0;
maxhead=0;
trie=newItem(0);
verbose=false;
~AprioriRules()
if(data)fclose(data);
if(trie){
trie->
deleteChildren();
deletetrie;
voidAprioriRules:
setData(char*fn)
data=fopen(fn,"
rt"
);
intAprioriRules:
setOutputRules(char*fn)
rulesout.open(fn);
if(!
rulesout.is_open()){
cerr<
<
"
error:
couldnotopen"
<
fn<
endl;
return-1;
return0;
Itemset*AprioriRules:
getNextSet()
Itemset*t;
vector<
int>
list;
charc;
do{
intitem=0,pos=0;
c=getc(data);
while((c>
='
0'
)&
&
(c<
9'
)){
item*=10;
item+=int(c)-int('
pos++;
if(pos)list.push_back(item);
}while(c!
\n'
&
!
feof(data));
if(feof(data))return0;
intsize=list.size()-1;
if(size>
=0){
t=newItemset(size);
t->
support=list[size];
i<
size;
i++)t->
t[i]=list[i];
returnt;
elsereturngetNextSet();
generateRules()
intsize=0;
clock_tstart;
//Readallfrequentitemsets
if(verbose)cout<
readingfrequentitemsets"
flush;
start=clock();
while(Itemset*t=getNextSet()){
iteratorit;
*items=trie->
makeChildren();
for(intdepth=0;
depth<
depth++){
it=items->
find(Item(t->
t[depth]));
if(it==items->
end())it=items->
insert(Item(t->
t[depth])).first;
items=it->
if(t->
length)it->
Increment(t->
support);
elsetrie->
size=(t->
length>
size?
length:
size);
deletet;
["
(clock()-start)/double(CLOCKS_PER_SEC)<
s]"
endl<
//generaterules
generatingrules"
int*iset=newint[size];
intadded=generateRules(trie->
getChildren(),iset,1);
delete[]iset;
returnadded;
generateRules(set<
*current,int*iset,intdepth)
if(current==0)return0;
intadded=0;
for(set<
iteratorrunner=current->
begin();
runner!
=current->
end();
runner++){
iset[depth-1]=runner->
getId();
if(depth>
1){
int*tmp=newint[depth];
added+=processSet(