HITS算法的二部图实现.docx

上传人:b****5 文档编号:7403513 上传时间:2023-01-23 格式:DOCX 页数:18 大小:20.30KB
下载 相关 举报
HITS算法的二部图实现.docx_第1页
第1页 / 共18页
HITS算法的二部图实现.docx_第2页
第2页 / 共18页
HITS算法的二部图实现.docx_第3页
第3页 / 共18页
HITS算法的二部图实现.docx_第4页
第4页 / 共18页
HITS算法的二部图实现.docx_第5页
第5页 / 共18页
点击查看更多>>
下载资源
资源描述

HITS算法的二部图实现.docx

《HITS算法的二部图实现.docx》由会员分享,可在线阅读,更多相关《HITS算法的二部图实现.docx(18页珍藏版)》请在冰豆网上搜索。

HITS算法的二部图实现.docx

HITS算法的二部图实现

packageextrcting;

importjava.io.*;

importjava.util.ArrayList;

importjava.util.HashMap;

importjava.util.HashSet;

importjava.util.Iterator;

publicclassHITS

{

//publicArrayListfacet;

publicHashMap>graphFacet;//没有必要采用hashset,因为反正都得遍历

publicHashMap>graphEmo;

publicHashMapauthScore;//每个节点的中心度

publicHashMapcenterScore;//每个节点的权威度

publicHashMapedgeGraph;//边得信息,暂时没用上

publicHashMapwordLex;//词表,记录词语出现的次数

publicHashSetLexicon;

publicHashMapfacetScoreA;

publicHashMapfacetScoreC;

publicHashMapemoScoreA;

publicHashMapemoScoreC;

publicfinalStringdata="20110317";

publicIntegernounCount=0;

publicIntegeremoCount=0;

publicIntegernounCountEmo=0;

publicIntegeremoCountEmo=0;

publicintrowNumber;//所有行

publicintrowNumberEmo=0;;//所有含有情感词的行

publicHITS()throwsIOException//构造函数

{

BufferedReaderreader=newBufferedReader(newInputStreamReader(newFileInputStream("Lexicon.txt")));

Stringwords="";

rowNumber=0;

Lexicon=newHashSet(5000);

authScore=newHashMap(10000);//每个节点的中心度

centerScore=newHashMap(10000);

graphFacet=newHashMap>();

graphEmo=newHashMap>();

wordLex=newHashMap();

facetScoreA=newHashMap(5000);

facetScoreC=newHashMap(5000);

emoScoreA=newHashMap(5000);

emoScoreC=newHashMap(5000);

while((words=reader.readLine())!

=null)

{

Lexicon.add(words+"/n");

}

extract();

//doHITS();

//finalScore();

//doubleaverageNoun=nounCount/(double)rowNumber;

//doubleaverageEmo=emoCount/(double)rowNumber;

//doubleaverageNounEmo=nounCountEmo/(double)rowNumberEmo;

//doubleaverageEmoEmo=emoCountEmo/(double)rowNumberEmo;

//System.out.println("名词个数"+nounCount);

//System.out.println("情感词个数"+emoCount);

//System.out.println("平均名词个数"+averageNoun);

//System.out.println("平均情感词个数"+averageEmo);

//System.out.println("所有行数"+rowNumber);

//System.out.println("有情感的行数"+rowNumberEmo);

//System.out.println("情感句平均名词个数"+averageNounEmo);

//System.out.println("情感句平均情感词个数"+averageEmoEmo);

}

publicvoiddoHITS()throwsIOException

{

intiterOver=0;//代表迭代结束

finalintiterTime=1000;//迭代次数

intcount=0;//计数,迭代次数

//Filelog=newFile("E:

\\HITSlog");//日志主要是查看盐的相关信息的

//if(log.exists())

//{

//log.delete();

//}

//log.createNewFile();

//FileWriterwrite=newFileWriter(log);

//BufferedWriterbw=newBufferedWriter(write);

intcodeNumber=authScore.size();

//while(iterOver!

=1)

while(iterOver!

=1&&count

{

doubleauthMax=.0;

doublecenterMax=.0;//新的归一化算法

doubleauthSum=.0;

doublecenterSum=.0;//用于归一化

HashMapauthScoreLast=newHashMap(authScore);

//做实验表明,必须new,不new存的是地址

HashMapcenterScoreLast=newHashMap(centerScore);

System.out.println("第"+count+"迭代"+"节点数"+codeNumber+"个");

IteratoriteratorFacet=graphFacet.keySet().iterator();//对属性部分遍历

while(iteratorFacet.hasNext())

{

StringsFacet="";

sFacet=(String)iteratorFacet.next();

ArrayListiterEmoArray=newArrayList(graphFacet.get(sFacet));

DoubleauthScoreArray=.0;

DoublecenterScoreArray=.0;

for(inti=0;i

/*

*每个点的中心度是各个点的权威度之和

*每个点的权威度是各个点的中心度之和

*/

{

//if(sFacet.equals("盐/n"))

//{

//bw.write(iterEmoArray.get(i));

//bw.newLine();

//bw.flush();

//}

authScoreArray=authScoreArray+centerScoreLast.get(iterEmoArray.get(i));

authSum=authSum+centerScoreLast.get(iterEmoArray.get(i));

centerScoreArray=centerScoreArray+authScoreLast.get(iterEmoArray.get(i));

centerSum=centerSum+authScoreLast.get(iterEmoArray.get(i));

}

authScore.put(sFacet,authScoreArray);

centerScore.put(sFacet,centerScoreArray);

if(authScoreArray>authMax)//新归一化

{

authMax=authScoreArray;

}

if(centerScoreArray>centerMax)

{

centerMax=centerScoreArray;

}

}

IteratoriteratorEmo=graphEmo.keySet().iterator();//对情感部分遍历

while(iteratorEmo.hasNext())

{

StringsEmo="";

sEmo=(String)iteratorEmo.next();

ArrayListiterFacetArray=newArrayList(graphEmo.get(sEmo));

DoubleauthScoreArray=.0;

DoublecenterScoreArray=.0;

for(inti=0;i

/*

*每个点的中心度是各个点的权威度之和

*每个点的权威度是各个点的中心度之和

*/

{

authScoreArray=authScoreArray+centerScoreLast.get(iterFacetArray.get(i));

authSum=authSum+centerScoreLast.get(iterFacetArray.get(i));

centerScoreArray=centerScoreArray+authScoreLast.get(iterFacetArray.get(i));

authScoreLast.get(iterFacetArray.get(i));

}

authScore.put(sEmo,authScoreArray);

centerScore.put(sEmo,centerScoreArray);

if(authScoreArray>authMax)//新归一化

{

authMax=authScoreArray;

}

if(centerScoreArray>centerMax)

{

centerMax=centerScoreArray;

}

}

/*

*归一化机制

*/

IteratoriteratorAuthNorm=authScore.keySet().iterator();

while(iteratorAuthNorm.hasNext())

{

StringitFacet=(String)iteratorAuthNorm.next();

//DoubleauthScoreNorm=authScore.get(itFacet)/authSum;

DoubleauthScoreNorm=authScore.get(itFacet)/authMax;//新归一化

authScore.put(itFacet,authScoreNorm);

}

IteratoriteratorCenterNorm=centerScore.keySet().iterator();

while(iteratorCenterNorm.hasNext())

{

StringitEmo=(String)iteratorCenterNorm.next();

//DoublecenterScoreNorm=centerScore.get(itEmo)/centerSum;

DoublecenterScoreNorm=centerScore.get(itEmo)/centerMax;//新归一化

centerScore.put(itEmo,centerScoreNorm);

}

/*

*归一化机制

*/

IteratoriteratorAuth=authScore.keySet().iterator();//遍历中心度

intcountJudge=0;

while(iteratorAuth.hasNext())

{

Stringword="";

word=(String)iteratorAuth.next();

if(((authScore.get(word)-authScoreLast.get(word)<0.00001)&&

(authScore.get(word)-authScoreLast.get(word)>-0.00001))

&&((centerScore.get(word)-centerScoreLast.get(word)<0.00001)&&

centerScore.get(word)-centerScoreLast.get(word)>-0.00001))

{

countJudge++;

}

}

//System.out.println(authScore);

//System.out.println(authScoreLast);

//System.out.println("查看迭代终止的另一个条件countjudge"+countJudge);

if(countJudge==authScore.size())

{

iterOver=1;

}

count++;

}

/*

*迭代结束后的赋值阶段,最终得分阶段

*/

System.out.println(graphFacet);

System.out.println(graphEmo);

}

publicvoidfinalScore()

{

IteratoriteFacet=graphFacet.keySet().iterator();//把得分乘以出现的次数

while(iteFacet.hasNext())

{

Stringfacet="";

facet=(String)iteFacet.next();

doublescoreFA=.0;

doublescoreFC=.0;

doubletimeF=Math.log10(wordLex.get(facet));

scoreFA=authScore.get(facet)*timeF;

facetScoreA.put(facet,scoreFA);

scoreFC=centerScore.get(facet)*timeF;

facetScoreC.put(facet,scoreFC);

}

IteratoriteEmo=graphEmo.keySet().iterator();

while(iteEmo.hasNext())

{

Stringemo="";

emo=(String)iteEmo.next();

doublescoreEA=.0;

doublescoreEC=.0;

doubletimeE=Math.log10(wordLex.get(emo));

scoreEA=authScore.get(emo)*timeE;

emoScoreA.put(emo,scoreEA);

scoreEC=authScore.get(emo)*timeE;

emoScoreC.put(emo,scoreEC);

}

}

publicvoidextract()throwsIOException

{

System.out.println("开始抽取");

StringinputDataPath="E:

\\日本地震_2\\corpus_sum_split\\sum_split"+data;

//StringinputDataPath="E:

\\日本地震_2\\corpus_sum_split\\sum_splitSum";

FileinputFile=newFile(inputDataPath);

FileReaderread=newFileReader(inputFile);

BufferedReaderre=newBufferedReader(read);

Stringrow="";

while((row=re.readLine())!

=null)

{

intemoSig=0;

intemoEmoCountRow=0;

intemoNounCountRow=0;

System.out.println("处理第"+rowNumber+"行");

//System.out.println(row);

ArrayList[]sentenceInfo=newArrayList[50];

//一句话的信息,每一个Arraylist为子句的信息,就是用,分格出来的字句

for(inti=0;i<50;i++)

{

sentenceInfo[i]=newArrayList();

}

intcount=0;

intcontinueNoun=0;//记录名词后面是否有其他词,如果有的话,就不考虑“的”的问题了。

String[]words=newString[100];

words=row.split("");

for(inti=0;i

/*

*for循环的作用是遍历每一个词,加入到句子信息中

*/

{

if(words[i].indexOf("/nl")!

=-1||words[i].indexOf("/nr")!

=-1

||words[i].indexOf("/nrf")!

=-1||words[i].indexOf("/nrj")!

=-1

||words[i].indexOf("/nr1")!

=-1||words[i].indexOf("/nr2")!

=-1

||words[i].indexOf("/nsf")!

=-1||words[i].indexOf("/ns")!

=-1

||words[i].indexOf("/nt")!

=-1||words[i].indexOf("/nz")!

=-1

||words[i].indexOf("/n")!

=-1)

/*

*ng不要,是两次神马的

*/

{

if(words[i]!

=null)

//System.out.println(words[i]);

{

if(words[i].substring(0,words[i].indexOf("/")).matches("[\u4e00-\u9fa5]+")

||words[i].substring(0,words[i].indexOf("/")).matches("[a-zA-Z]+")

||words[i].substring(0,words[i].indexOf("/")).matches("[0-9]+"))

{

//if(rowNumber>11183)

//{

//System.out.println(words[i]);

//System.out.println(row);

//System.out.println(count);

//Sy

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > PPT模板 > 动态背景

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1