1、数据结构代码相似度检测思路:首先,对要进行比较的所有代码段进行词法分析,并转化为特定的标记(token)串,自己制定一个转换规则。接着,通过两两比较标记(token)串来确定代码之间的相似性,并由此确定代码之间抄袭的程度,过程如图2.1所示。图2.1 源代码复制检测过程示意图将这两个代码分别转换为token串后,基于算法RKR-GST( running-karp-rabin greedy-string-tiling)算法思想,循环求取两个标记串中未被匹配部分的最大公共子串,将其用空格代替,并根据如下公式求出两个token串A,B的相似度:(2.3)过程论述流程图首先要对整个设计做一个整体规划,
2、即设计一个流程图,如图3.1。图3.1 程序流程图程序清单#include #include #include #include #include #include #include #define N 10000#define M 10000#define MAXSTRLEN 10000 /定义最大串长typedef int status;typedef unsigned char SStringMAXSTRLEN+1; /串的定长顺序存储表示SString a3=int,long,short;SString b2=float,double;SString c15=&,|,+,-,+,-,*
3、,/,=,=,;SString d12=,(,),;,#,;,.;SString e29=auto,break,case,char,const,continue,default,do,else,enum, extern,for,goto,if,main,printf,register,return,signed,sizeof, static,struct,switch,typedef,union,unsigned,void,while,volatile;HANDLE hOut;DWORD written;void ShadowWindowLine(char *str);char type(ch
4、ar *str);void token(char name,char list,char token,FILE *table);void simple(int MinMatchLen,FILE *fp1,FILE *fp2);status replace(SString s,int pos,int len,int Ls);int copy(float n);void ShadowWindowLine(char *str)SMALL_RECT rc;CONSOLE_SCREEN_BUFFER_INFO bInfo; / 窗口缓冲区信息WORD att0,att1,attBack;int i, c
5、hNum = strlen(str);GetConsoleScreenBufferInfo( hOut, &bInfo ); / 获取窗口缓冲区信息/ 计算显示窗口大小和位置rc.Left = (bInfo.dwSize.X - chNum)/2 - 2;rc.Top = 8; / 原代码段中此处为bInfo.dwSize.Y/2 - 2,但是如果您的DOS屏幕有垂直滚动条的话,还需要把滚动条下拉才能看到,为了方便就把它改为10rc.Right = rc.Left + chNum + 4;rc.Bottom = rc.Top + 4;att0 = BACKGROUND_RED |BACKGRO
6、UND_BLUE; / 阴影属性att1 = FOREGROUND_RED |FOREGROUND_GREEN |FOREGROUND_BLUE | FOREGROUND_INTENSITY | BACKGROUND_RED | BACKGROUND_BLUE | BACKGROUND_INTENSITY;/ 文本属性attBack = BACKGROUND_RED |BACKGROUND_GREEN |BACKGROUND_BLUE | BACKGROUND_INTENSITY; / 背景属性/ 设置阴影然后填充COORD posShadow = rc.Left+1, rc.Top+1, p
7、osText = rc.Left, rc.Top,posBack=0,0;for (i=0;i25;i+)FillConsoleOutputAttribute(hOut, attBack,80, posBack, &written);posBack.Y+;for (i=0; i5; i+)FillConsoleOutputAttribute(hOut, att0, chNum + 4, posShadow, &written);posShadow.Y+;for (i=0;i5;i+)FillConsoleOutputAttribute(hOut, att1,chNum + 4, posText
8、, &written);posText.Y+;/ 写文本和边框posText.X = rc.Left + 2;posText.Y = rc.Top + 2;WriteConsoleOutputCharacter(hOut, str, strlen(str), posText, &written);SetConsoleTextAttribute(hOut, bInfo.wAttributes); / 恢复原来的属性char type(char *str) /此函数判断单词类型int i;for(i=0;i3;i+) /a中的关键字if(strcmp(str,ai)=0)return K;for(
9、i=0;i2;i+) /b中的关键字if(strcmp(str,bi)=0)return E;for(i=0;i15;i+) /c中的符号if(strcmp(str,ci)=0)return A;for(i=0;i12;i+) /d中符号if(strcmp(str,di)=0)return R;for(i=0;i29;i+) /e中的关键字if(strcmp(str,ei)=0)return Y;if(isdigit(str0) /09是数字return N;/一般的变量与字符if(!isalnum(str0)return H;else return C;/变量void token(char
10、name,char list,char token,FILE *table) /将两个文件中的字符串分别切割转换为token串FILE *in,*out; char ch,c,bufferN,*linkM;int i=0,j=0,k=0,LenLink=0;if(in=fopen(name,r+)=NULL)printf(源文件无法打开!n);exit(0);if(out=fopen(list,w+)=NULL)printf(文件写入失败!n);exit(0);if(table=fopen(token,w+)=NULL)printf(文件写入失败!n);exit(0);while(!feof(in) /逐字读取文件ch=fgetc(in);if(ch=t | ch= | ch= n) /去掉空格、制表符、回车
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1