数据挖掘第三次实验报告Word格式文档下载.docx
《数据挖掘第三次实验报告Word格式文档下载.docx》由会员分享,可在线阅读,更多相关《数据挖掘第三次实验报告Word格式文档下载.docx(13页珍藏版)》请在冰豆网上搜索。
*purposesandasarootclassforexperimentingon
*optimizations.
*
*InthelatestversiontheuseofDataHandlerisaddedforreading
*thedatabase.
*@authorMichaelHoller
*@version0.8,16.03.2004
*/
publicclassApriori{
intpass;
//numberofpasses
inttotal;
//totalnumberoffrequentitemsets
intminsup;
//minimalsupportofitemset
Stringfilename;
//thefilenameofthedatabase
Itemroot;
//therootitemoftheTrie
BufferedWriterwriter;
//thebuffertowritetheoutputto
DataHandlerdh;
//thehandlerforthedatabase
/**
*DefaultconstructurforcreatingaAprioriobject.
*/
publicApriori(){
this.pass=0;
this.minsup=4;
this.dh=newDataHandler("
test.dat"
);
this.root=newItem(0);
}
*ConstructurforcreatingaAprioriobjectwithparameters.
*@paramfilenamethenameofthedatabasefile
*@paramminsuptheminimalsupportthreshold
*@paramoutfilethenameoftheoutputfile
publicApriori(Stringfilename,intminsup,Stringoutfile){
this.minsup=minsup;
this.dh=newDataHandler(filename);
try{
if(!
outfile.equals("
"
)){
writer=newBufferedWriter(newFileWriter(outfile));
}catch(Exceptione){}
*Thisoneisusedwithotherminingalgorithms.
*@paramdatahandlerthehandlerforthedatabase
publicApriori(intminsup,DataHandlerdatahandler){
this.dh=datahandler;
*Theworkhorsemethodforthebasicimplementationof
*theApriorialgorithm.
publicvoidfindFrequentSets(){
booleanrunning=true;
intcandidates=0,transactions=0,pruned=0,itemsets;
while(running){
this.pass++;
candidates=this.generateCandidates(this.root,newVector(),1);
transactions=this.countSupport();
pruned=this.pruneCandidates(this.root);
itemsets=candidates-pruned;
//correctthecandidatecountonfirstpassforprinting
if(this.pass==1)
candidates=total;
total+=itemsets;
if(itemsets<
=this.pass&
&
this.pass>
1){
running=false;
}
System.out.println("
pass:
"
+this.pass+
"
total:
+total+
candidates:
+candidates+
pruned:
+pruned);
*Methodforgeneratingnewcandidates.
*Copiesthesiblingsofanitemtoitschildren.
*@paramitemtheitemtowhichgenerateditemsareadded
*@paramdepththedepthofrecursion
*@returnthenumberofnewcandidatesgenerated
publicintgenerateCandidates(Itemitem,Vectorcurrent,intdepth){
Vectorv=item.getChildren();
Itemchild=item;
intgenerated=0;
for(Enumeratione=v.elements();
e.hasMoreElements();
){
child=(Item)e.nextElement();
current.add(child);
if(depth==this.pass-1){
generated+=this.copySiblings(child,v,current);
}else{
generated+=this.generateCandidates(child,current,depth+1);
current.remove(child);
returngenerated;
*MethodforcopyingthesiblingsofanItemtoitschildren.
*@paramitemtheitemtowhichthesiblingsarecopied
*@paramsiblingsthesiblingstobecopied
*@paramcurrentthecurrentitemsettobegenerated
*@returnthenumberofsiblingscopied
publicintcopySiblings(Itemitem,Vectorsiblings,Vectorcurrent){
Enumeratione=siblings.elements();
Itemparent=item;
Itemsibling=newItem();
intcopied=0;
while(sibling.getLabel()<
parent.getLabel()&
e.hasMoreElements()){
sibling=(Item)e.nextElement();
while(e.hasMoreElements()){
current.add(sibling);
if(this.pass<
=2||this.checkSubsets(current,this.root.getChildren(),0,1)){
parent.addChild(newItem(sibling.getLabel()));
copied++;
current.remove(sibling);
returncopied;
*Checksifthesubsetsoftheitemsettobegeneratedareallfrequent.
*@paramcurrentthecurrentitemsettobegenerated
*@paramchildrenthechildreninthetrieonthisdepth
*@parammarkthemarkinthecurrentitemset
*@paramdepthdepthofrecursion
*@returntrueifthesubsetsarefrequent,elsefalse
publicbooleancheckSubsets(Vectorcurrent,Vectorchildren,intmark,intdepth){
booleanok=true;
Itemchild;
intindex;
inti=depth;
if(children==null)returnfalse;
while(ok&
(mark<
=i)){
index=children.indexOf(current.elementAt(i));
if(index>
=0){
if(depth<
this.pass-1){
child=(Item)children.elementAt(index);
ok=checkSubsets(current,child.getChildren(),i+1,depth+1);
ok=false;
i--;
returnok;
*Methodforcountingthesupportsofthecandidates
*generatedonthispass.
*@returnthenumberoftransactionsfromwhich
*thesupportwascounted
publicintcountSupport(){
introwcount=0;
int[]items;
this.dh.open();
for(items=this.dh.read();
items.length>
0;
items=this.dh.read()){
rowcount++;
if(this.pass==1){
this.root.incSupport();
this.total+=generateFirstCandidates(items);
countSupport(root,items,0,1);
returnrowcount;
*Methodgeneratesthefirstcandidatesbyaddingeachitem
*foundinthedatabasetothechildrenoftherootitem.Also
*countsthesupportsoftheitemsfoundinthedatabase.
*@paramitemsthearrayofintegeritemsfromthedatabase
*@returnthenumberofcandidatesgenerated
publicintgenerateFirstCandidates(int[]items){
Vectorv=root.getChildren();
Enumeratione=v.elements();
Itemitem=newItem();
for(inti=0;
i<
items.length;
i++){
while(e.hasMoreElements()&
item.getLabel()<
items[i]){
item=(Item)e.nextElement();
if(item.getLabel()==items[i]){
item.incSupport();
if(e.hasMoreElements())
}elseif(item.getLabel()>
intindex=v.indexOf(item);
Itemchild=newItem(items[i]);
child.incSupport();
this.root.addChild(child,index);
generated++;
}else{
this.root.addChild(child);
*AddsthecoveroftheItemgivenasparamaterandallthe
*ItemsinTriebelowit.
*@paramitemtheitemthecoverofwhichistobecounted
*@paramithepositioninthearray
*@paramdepththedepthofrecursion
publicvoidcountSupport(Itemitem,int[]items,inti,intdepth){
inttmp;
//loopthroughthechildrentocheck
//break,ifthewholetransactionischecked
if(i==items.length){break;
//doalinearsearchforthechildinthetransactionstartingfromi
tmp=i;
while(tmp<
items.length&
items[tmp]<
child.getLabel())tmp++;
//ifthesameitemexists,increasesupportorgodeaper
if(tmp<
child.getLabel()==items[tmp]){
if(depth==this.pass){
countSupport(child,items,tmp+1,depth+1);
i=tmp+1;
*Methodforpruningthecandidates.Removesitemsthatare
*notfrequentfromtheTrie.
*@paramitemtheitemthechildrenofwhichwillbepruned
*@returnthenumberofitemsprunedfromthecandidates
publicintpruneCandidates(Itemitem){
intpruned=0;
for(Enumeratione=newVector(v).elements();
//checkinfrequency,existenceandthatitisfullycounted
if(child.getSupport()<
this.minsup){
v.remove(child);
pruned++;
pruned+=pruneCandidates(child);
returnpruned;
*Methodgetsandreturnstherootofthe
*candidatetrie.
*@returntherootofthecandidatetrie
publicItemgetTrie(){
returnthis.root;
*Methodprintstheitemsetstothesystemoutputandtoafile
*ifthenameofanoutputfileexists.
publicvoidprintFrequentSets(){
if(this.writer!
=null){
print(root,"
\nnumberoffrequentitemsetsfound:
+this.total);
*LoopsthroughtheTrierecursivelyadding
*pathsandsubpathstotheoutputstringalongtheway.
*@paramitemtheitemwheretherecursionis
*@paramstrthestringofthegatherditemset
publicvoidprint(Itemitem,Stringstr){
this.writer.write(str+item.getLabel()
+"
("
+item.getSupport()+"
)\n"
this.writer.flush();
}catch(Exceptionx){
nooutputfile"
if(item.hasChildren()){
print(item,str+item.getLabel()+"