1、HITS算法的二部图实现package extrcting;import java.io.*;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;public class HITS /public ArrayList facet ; public HashMapString, ArrayList graphFacet;/没有必要采用hashset,因为反正都得遍历 public HashMapString, ArrayList graphEm
2、o; public HashMap authScore; /每个节点的中心度 public HashMap centerScore; /每个节点的权威度 public HashMap edgeGraph; /边得信息,暂时没用上 public HashMap wordLex; / 词表,记录词语出现的次数 public HashSet Lexicon; public HashMap facetScoreA; public HashMap facetScoreC; public HashMap emoScoreA; public HashMap emoScoreC; public final S
3、tring data=20110317; public Integer nounCount=0; public Integer emoCount=0; public Integer nounCountEmo=0; public Integer emoCountEmo=0; public int rowNumber;/所有行 public int rowNumberEmo=0;/所有含有情感词的行 public HITS() throws IOException /构造函数 BufferedReader reader = new BufferedReader(new InputStreamRea
4、der(new FileInputStream(Lexicon.txt); String words=; rowNumber=0; Lexicon = new HashSet(5000); authScore= new HashMap(10000); /每个节点的中心度 centerScore = new HashMap (10000); graphFacet = new HashMapString, ArrayList(); graphEmo = new HashMapString, ArrayList(); wordLex = new HashMap(); facetScoreA = ne
5、w HashMap(5000); facetScoreC = new HashMap(5000); emoScoreA = new HashMap(5000); emoScoreC = new HashMap(5000); while(words=reader.readLine()!=null) Lexicon.add(words+/n); extract();/ doHITS();/ finalScore(); / double averageNoun = nounCount/(double)rowNumber;/ double averageEmo = emoCount/(double)r
6、owNumber;/ double averageNounEmo = nounCountEmo/(double)rowNumberEmo;/ double averageEmoEmo = emoCountEmo/(double)rowNumberEmo;/ System.out.println(名词个数+nounCount);/ System.out.println(情感词个数+emoCount);/ System.out.println(平均名词个数+averageNoun);/ System.out.println(平均情感词个数+averageEmo);/ System.out.prin
7、tln(所有行数+rowNumber);/ System.out.println(有情感的行数+rowNumberEmo);/ System.out.println(情感句平均名词个数+averageNounEmo);/ System.out.println(情感句平均情感词个数+averageEmoEmo); public void doHITS() throws IOException int iterOver=0; /代表迭代结束 final int iterTime = 1000; /迭代次数 int count =0; /计数,迭代次数 / File log = new File(E
8、:HITSlog);/日志主要是查看盐的相关信息的/ if(log.exists()/ / log.delete();/ / log.createNewFile();/ FileWriter write = new FileWriter(log);/ BufferedWriter bw = new BufferedWriter(write); int codeNumber = authScore.size(); /while(iterOver!=1) while(iterOver!=1&countiterTime)/迭代 double authMax=.0; double centerMax=
9、.0;/新的归一化算法 double authSum =.0; double centerSum =.0;/用于归一化 HashMap authScoreLast= new HashMap(authScore); /做实验表明,必须new,不new存的是地址 HashMap centerScoreLast = new HashMap(centerScore); System.out.println(第+count+迭代+节点数+codeNumber+个); Iterator iteratorFacet =graphFacet.keySet().iterator();/对属性部分遍历 while
10、(iteratorFacet.hasNext() String sFacet =; sFacet = (String) iteratorFacet.next(); ArrayList iterEmoArray = new ArrayList(graphFacet.get(sFacet); Double authScoreArray=.0; Double centerScoreArray =.0; for(int i=0;iauthMax)/新归一化 authMax = authScoreArray; if(centerScoreArraycenterMax) centerMax = cente
11、rScoreArray; Iterator iteratorEmo =graphEmo.keySet().iterator();/对情感部分遍历 while(iteratorEmo.hasNext() String sEmo =; sEmo = (String) iteratorEmo.next(); ArrayList iterFacetArray = new ArrayList(graphEmo.get(sEmo); Double authScoreArray=.0; Double centerScoreArray =.0; for(int i=0;iauthMax)/新归一化 authM
12、ax = authScoreArray; if(centerScoreArraycenterMax) centerMax = centerScoreArray; /* * 归一化机制 */ Iterator iteratorAuthNorm = authScore.keySet().iterator(); while(iteratorAuthNorm.hasNext() String itFacet= (String)iteratorAuthNorm.next();/ Double authScoreNorm = authScore.get(itFacet)/authSum; Double a
13、uthScoreNorm = authScore.get(itFacet)/authMax; /新归一化 authScore.put(itFacet, authScoreNorm); Iterator iteratorCenterNorm = centerScore.keySet().iterator(); while(iteratorCenterNorm.hasNext() String itEmo = (String)iteratorCenterNorm.next();/ Double centerScoreNorm = centerScore.get(itEmo)/centerSum;
14、Double centerScoreNorm = centerScore.get(itEmo)/centerMax;/新归一化 centerScore.put(itEmo, centerScoreNorm); /* * 归一化机制 */ Iterator iteratorAuth = authScore.keySet().iterator();/遍历中心度 int countJudge=0; while(iteratorAuth.hasNext() String word=; word = (String)iteratorAuth.next(); if(authScore.get(word)-
15、authScoreLast.get(word)-0.00001) &(centerScore.get(word)-centerScoreLast.get(word)-0.00001) countJudge+; /System.out.println(authScore); /System.out.println(authScoreLast); /System.out.println(查看迭代终止的另一个条件countjudge+countJudge); if(countJudge=authScore.size() iterOver=1; count+; /* * 迭代结束后的赋值阶段,最终得分
16、阶段 */ System.out.println(graphFacet); System.out.println(graphEmo); public void finalScore() Iterator iteFacet = graphFacet.keySet().iterator();/把得分乘以出现的次数 while(iteFacet.hasNext() String facet=; facet = (String)iteFacet.next(); double scoreFA=.0; double scoreFC=.0; double timeF = Math.log10(wordLex
17、.get(facet); scoreFA = authScore.get(facet)*timeF; facetScoreA.put(facet, scoreFA); scoreFC = centerScore.get(facet)*timeF; facetScoreC.put(facet, scoreFC); Iterator iteEmo = graphEmo.keySet().iterator(); while(iteEmo.hasNext() String emo =; emo =(String)iteEmo.next(); double scoreEA=.0; double scor
18、eEC=.0; double timeE= Math.log10(wordLex.get(emo); scoreEA = authScore.get(emo)*timeE; emoScoreA.put(emo, scoreEA); scoreEC = authScore.get(emo)*timeE; emoScoreC.put(emo, scoreEC); public void extract() throws IOException System.out.println(开始抽取); String inputDataPath = E:日本地震_2corpus_sum_splitsum_s
19、plit+data;/ String inputDataPath = E:日本地震_2corpus_sum_splitsum_splitSum; File inputFile = new File(inputDataPath); FileReader read = new FileReader(inputFile); BufferedReader re = new BufferedReader(read); String row =; while(row= re.readLine()!=null) int emoSig=0; int emoEmoCountRow=0; int emoNounC
20、ountRow=0; System.out.println(处理第+rowNumber+行); /System.out.println(row); ArrayList sentenceInfo = new ArrayList50; /一句话的信息,每一个Arraylist为子句的信息,就是用,分格出来的字句 for(int i=0;i50;i+) sentenceInfoi= new ArrayList(); int count=0; int continueNoun=0;/记录名词后面是否有其他词,如果有的话,就不考虑“的”的问题了。 String words = new String100; words = row.split( ); for(int i=0; i11183)/ / System.out.println(wordsi);/ System.out.println(row);/ System.out.println(count);/ Sy
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1