1、汉字字频统计import java.awt.List;欧阳光明(2021.03.07)import java.io.*;import jxl.*;import jxl.write.*;import java.text.DecimalFormat;import java.util.ArrayList;public class statistics public static void main(String args) / 读字表 ArrayList chtable = readFromTable(CHTable.txt); System.out.println(字表大小为: + chtable
2、.size(); / 读文件 ArrayList numlist = readFromFile(10.txt, chtable); / 排序 ArrayList chlist = sort(chtable, numlist); / 计算汉字的总数 int sum = 0; for (int i = 0; i numlist.size(); i+) sum = sum + (Integer) numlist.get(i); System.out.println(-显示结果-); / 返回指定个数的汉字频率统计结果 ArrayList freqlist = frequency(chlist, nu
3、mlist, sum, 100); / 计算熵值 float sh = entropy(freqlist); / 计算指定个汉字的字频总和 float fre1 = freqSum(freqlist, 1); float fre2 = freqSum(freqlist, 20); float fre3 = freqSum(freqlist, 100); float fre4 = freqSum(freqlist, 600); float fre5 = freqSum(freqlist, 2000); float fre6 = freqSum(freqlist, 3000); float fre
4、7 = freqSum(freqlist, 6000); ArrayList freal = new ArrayList(); freal.add(fre1); freal.add(fre2); freal.add(fre3); freal.add(fre4); freal.add(fre5); freal.add(fre6); freal.add(fre7); ArrayList nal = new ArrayList(); nal.add(1); nal.add(20); nal.add(100); nal.add(600); nal.add(2000); nal.add(3000); n
5、al.add(6000); System.out.println(-程序结束-); / 生成Excel的类 try / 打开文件 WritableWorkbook book = Workbook.createWorkbook(new File(统计结果.xls); / 生成工作表,参数0表示这是第一页 WritableSheet sheet = book.createSheet(sum+字, 0); /* * 生成一个保存数字的单元格 必须使用Number的完整包路径,否则有语法歧义 */ /表头 Label label1 = new Label(0, 0, 字符); sheet.addCel
6、l(label1); Label label2 = new Label(1, 0, 频率); sheet.addCell(label2); for(int i=0;i100;i+) / 中文字符 Label label = new Label(0, i+1, chlist.get(i).toString(); sheet.addCell(label); / 出现的频率 jxl.write.Number number = new jxl.write.Number(1, i+1, (Float)freqlist.get(i); sheet.addCell(number); /写入熵值 Label
7、lsh = new Label(0, 101, 熵值); sheet.addCell(lsh); jxl.write.Number nsh = new jxl.write.Number(1, 101, sh); sheet.addCell(nsh); /写入字频总和 for(int i=0;i= u4e00 & tempint = uf900 & tempint = ufa2d) char tempchar = (char) tempint; / System.out.println(tempchar); / System.out.println(list.size: + chlist.siz
8、e(); / 判断该字符是否出现过 int i = 0; for (i = 0; i chlist.size(); i+) / 一旦重复,跳出循环 char c = ; Object ob = chlist.get(i); if (ob instanceof Character) c = (Character) ob; / System.out.println(c: + c); if (tempchar = c) / System.out.println(重复!); break; / 字符从未出现过 if (i = chlist.size() / System.out.println(新字符!
9、); chlist.add(tempchar); reader.close(); catch (Exception e) e.printStackTrace(); return chlist; /* * 该函数用于从文件中读取中文字符,并返回它出现的次数 * * param filename * return */ public static ArrayList readFromFile(String filename, ArrayList chtable) File file = new File(filename); Reader reader = null; ArrayList numl
10、ist = new ArrayList(); / 初始化字符出现的次数集合 for (int i = 0; i = u4e00 & tempint = uf900 & tempint = ufa2d) char tempchar = (char) tempint; / System.out.println(tempchar); / System.out.println(list.size: + chlist.size(); / 判断该字符是否在字表里 int i = 0; for (i = 0; i chtable.size(); i+) / 在字表里,统计重复次数并跳出循环 char c =
11、 ; Object ob = chtable.get(i); if (ob instanceof Character) c = (Character) ob; / System.out.println(c: + c); if (tempchar = c) int num = (Integer) numlist.get(i) + 1; numlist.set(i, num); break; reader.close(); catch (Exception e) e.printStackTrace(); return numlist; /* * 该函数用来对汉字出现的次数进行从大到小的排序,返回排
12、序结果 * * param chlist * param numlist */ public static ArrayList sort(ArrayList chtable, ArrayList numlist) ArrayList chlist = chtable; for (int i = 0; i numlist.size(); i+) for (int j = i + 1; j numlist.size(); j+) int listi = (Integer) numlist.get(i); int listj = (Integer) numlist.get(j); if (listi
13、 listj) numlist.set(i, listj); numlist.set(j, listi); char chi = (Character) chlist.get(i); char chj = (Character) chlist.get(j); chlist.set(i, chj); chlist.set(j, chi); return chlist; /* * 该函数用来计算各个汉字出现的频率,并且显示出指定个数的结果 * * param chlist * param numlist * param sum * param count */ public static Arra
14、yList frequency(ArrayList chlist, ArrayList numlist, int sum, int count) ArrayList freqlist = new ArrayList(); / 计算频率 for (int j = 0; j chlist.size(); j+) float freq = (Integer) numlist.get(j) / (float) sum; freqlist.add(freq); / 按指定格式输出(保留6位有效数字) for (int j = 0; j freqlist.size() & j count; j+) Sys
15、tem.out.println(字符: + chlist.get(j); System.out.println(出现次数: + numlist.get(j); System.out.println(频率: + freqlist.get(j); System.out.println(-); System.out.println(中文字符总数: + sum); return freqlist; /* * 该函数用来计算熵值 * * param freqlist */ public static float entropy(ArrayList freqlist) float sum = 0f; for (int i = 0; i freqlist.size() return 0f; for (int i = 0; i count; i+) freqsum += (Float) freqlist.get(i); System.out.println(前 + count + 个汉字字频总和为: + freqsum); return freqsum;
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1