1、 sequences = null; * 最大频繁序列 maxFrSeqs = new ArrayList(); * 单项集合 itemList = new ArrayList * 单项序列总和数 private int total = 0; * 最小支持数(默认两个) private int minSup = 0; * 最小限制频繁序列元素数(默认两个) private int minFrElemSize = 0; * 最大限制频繁序列元素数(默认3个) private int maxFrElemSize = 0; public PrefixSpanBuild(List seqs) this
2、(seqs, 2, 2, 3); seqs, int minSup) this(seqs, minSup, 2, 3); seqs, int minSup, int minFrElemSize) this(seqs, minSup, minFrElemSize, 3); seqs, int minSup, int minFrElemSize, int maxFrElemSize) / 最小项集必须小于或等于限制项集数 if (minFrElemSize = maxFrElemSize) this.sequences = seqs; this.minSup = minSup; this.minF
3、rElemSize = minFrElemSize; this.maxFrElemSize = maxFrElemSize; for (List elem : this.sequences) items : elem) for (String item : items) if (!itemList.contains(item) itemList.add(item); total+;计算每个单项的支持数 * return 每个单项的支持数 protected Map countFr1() log.info(开始读取每个单项的支持数); Map supMap = new LinkedHashMap
4、 / sup计算每个单项出现的次数(支持数) Integer sup = 0; Set itemsSet = null; sequences) itemsSet = new HashSet itemsSet.addAll(items); itemsSet) if (itemList.contains(item) if (supMap.containsKey(item) sup = supMap.get(item) + 1; else sup = 1; supMap.put(item, sup); for (IteratorEntry iter = supMap.entrySet().itera
5、tor(); iter.hasNext();) Entry supEntry = (Entry) iter.next(); sup = supEntry.getValue(); if (sup minSup) iter.remove(); total = supMap.size();读取完毕 return supMap; public List replace(List strList, String prefixSeq) List= 0 & pla i - 1) retainList = new ArrayList if (length = 1) retainList.add(_ for (
6、int k = 0; k = pla; k+) retainList.addAll(strList.subList(pla + 1, i); return retainList;temp_s在其投影数据库中查找再次出现他的次数 * param t_num 序列总数 * param temps 序列 * param sd投影数据库 * param sd_count对应的索引 * return int public int makeout(String temps, List sdSeqs) return makeout(new String temps , sdSeqs); * param te
7、mpSeq 序列 * param sdSeqs 投影数据库 public int makeout(String tempSeq, List= 0) tMincout+; break; return tMincout;用PrefixSpan算法求出序列集的频繁序列 protected void prefixSpan(String prefixSeq, List this.maxFrElemSize - 1) return; for (int tTotal = 0; tTotal total; tTotal+) / 第一种情况a的投影数据库seqs,循环整个单项集合ItemList,看是否存在某个
8、item在seqs上还存在频繁单项eg:b int supNum1 = 0; String tempSeq = itemList.get(tTotal); supNum1 = makeout(tempSeq, seqs); if (supNum1 = minSup) / 开始记录频繁序列= this.minFrElemSize - 1) for (int i = 0; i prefixNum; i+) itemList.add(prefixSeqi); itemList.add(tempSeq); / 添加支持数 itemList.add(supNum1 + / 添加置信度 itemList.
9、add(float) supNum1 / seqs.size() + maxFrSeqs.add(itemList); sdSeqs = generateSD(seqs, tempSeq); String prefixSeq2 = new StringprefixNum + 1; for (int e = 0; e int supNum2 = 0; String tempSeq1 = prefixSeqprefixNum - 1 + , + itemList.get(tTotal); String tempSeq1s = tempSeq1.split( supNum2 = makeout(te
10、mpSeq1s, seqs); if (supNum2 = this.minFrElemSize) prefixNum - 1; itemList.add(tempSeq1); itemList.add(supNum2 + itemList.add(float) supNum2 / seqs.size() + sdSeqs = generateSD(seqs, tempSeq1s); String aa = new StringprefixNum; aae = prefixSeqe; aaprefixNum - 1 = tempSeq1; prefixSpan(aa, sdSeqs, pref
11、ixNum); buildPrefixSpan() supMap = this.countFr1(); int times = 0;符合支持度为,项集数为的总item数为, new Integer minSup, minFrElemSize, total ); String itemId = null; for (Entry supEntry : supMap.entrySet() itemId = supEntry.getKey(); / 生成投影数据库 sdList = this.generateSD(itemId); String prefixSeq = itemId ; this.pr
12、efixSpan(prefixSeq, sdList, 1); times+;执行到itemId-,已经循环到, new String prefixSeq0, times + ); return this.maxFrSeqs; public static void main(String args) String sequence = a, a,b,ca,cdc,fa,dd,ee,f, a,bd,fcbega,f ; sLists = new ArrayList e1 = new ArrayList String e = null; items = null; 5; e = sequencei
13、; if (e != null) items = new ArrayList e.split() items.add(item); Collections.sort(items); e1.add(items); e2 = new ArrayList for (int i = 5; 9; e2.add(items); e3 = new ArrayList for (int i = 9; 14; e3.add(items); e4 = new ArrayList for (int i = 14; 20; e4.add(items); sLists.add(e1); sLists.add(e2);
14、sLists.add(e3); sLists.add(e4); PrefixSpanBuild test = new PrefixSpanBuild(sLists, 2, 2); test.buildPrefixSpan(); System.out.println(序列数据库如下: test.sequences) item2s : System.out.print(item2s); System.out.print( System.out.println();执行PrefixSpan算法,生成频繁序列模式结果如下: / System.out.println(tempti); test.prin
15、tMaxFrSeq(); public void printMaxFrSeq() StringBuffer tempStrBuf = null; int seqSize = 0; sequence : maxFrSeqs) tempStrBuf = new StringBuffer(); seqSize = sequence.size(); tempStrBuf.append( seqSize - 3; String skuId = sequence.get(i); tempStrBuf.append(skuId + tempStrBuf.append(sequence.get(seqSize
16、 - 3); - + sequence.get(seqSize - 2); + sequence.get(seqSize - 1); log.info(tempStrBuf.toString(); * 根据前缀生成投影数据库 * param seqs 序列数据库S generateSD(List seqs, String prefixSeq) return generateSD(seqs, new String prefixSeq ); * param prefixSeq 前缀 generateSD(String prefixSeq) return generateSD(sequences, new String prefixSeq ); seqs, String prefixSeq) sdList = new ArrayList retainItems = null; sdElem = null; retainsdElem = null; sdElem = new ArrayList int n = containArra
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1