词法分析器实验.docx

资源描述

词法分析器实验.docx

《词法分析器实验.docx》由会员分享，可在线阅读，更多相关《词法分析器实验.docx（15页珍藏版）》请在冰豆网上搜索。

词法分析器实验.docx

词法分析器实验

1、名称：

词法分析器的设计与实现

2、实验要求：

设置一个名字表NameL和常数表ConstL，当遇到标识符时，将其字符串送入名字表NameL，并把其名字表地址作为标识符的语义Seman值。

常数情形也一样，不要求翻译成二进制数。

要求在NameL和ConstL表中没有相同元素，同时具有简单的错误处理功能（找出源程序中所有存在的词法错误并指出错误所在的行数）。

试用c语言写一个针对下述单词集的词法分析器。

填写下表：

单词

编码（class）

语义（Seman）

单词

编码（class）

语义（Seman）

空（^）

else

空（^）

for

空（^）

while

空（^）

break

空（^）

return

空（^）

continue

空（^）

float

空（^）

int

空（^）

char

空（^）

标识符

名字表地址

[

空（^）

正整数

常数表地址

]

空（^）

正实数

常数表地址

（

空（^）

零

常数表地址

）

空（^）

{

空（^）

}

空（^）

;

空（^）

换行符

空（^）

文件结束

空（^）

3、测试报告

3.1测试数据：

首先在记事本中输入一个程序（最好存在D盘里方便读取）：

3.2运行后在控制台上得到的结果如图所示：

3.3运行后得到的常数表与标识符表：

4、源代码

#include

#include"stdio.h"

#include"conio.h"

#include"string.h"

#defineMAX100

charin[MAX];//程序

charNameL[100][100];//标识符表

intConstL[100][100];//常数表

FILE*fin,*fout1,*fout2;

structa{

intclass;

charseman[10];

}token[80];

structerr{//记录错误信息的链表结点

intline1;

chardescribe[50];

structerr*next;

};

structerr*table;//错误表头结点和尾结点

structerr*tail;structerr*head;

inti,k=0,m=1,h=1,line=0,low=0,a=1,g=1;//k每个字符串的下标，m,h表中的下标，line,low表中字符串总数，a为0报错，g标示头结点

intline1=1;//记录行号

interrors=0;//记录错误数

intisletter（charx）

{

if（（x>='a'&&x<='z'）||（x>='A'&&x<='Z'））

return1;

elsereturn0;

}

intisdigit（charx）

{

if（x>='0'&&x<='9'）return1;

elsereturn0;

}

/*输出已识别的单词*/

voidprint_token（intk）

{

inti;

printf（"\ntokenlist:

"）;

for（i=0;i

printf（"\n%3dtoken.class:

%3dtoken.seman:

%3s",i+1,token[i].class,token[i].seman）;

}

intnext_token（）

{

charkeyword[100][100]={"if","else","for","while","break","return","continue","float","int","char","printf"};//关键字表

charah,ch,name[10];//ah标识正在进行字符的下一字符，用来判断该标识符是否包含符号

intstate,l,n,j,c=1,x=1,s=1,p=1;//c用来判断是否关键字，x用来判断是否以数字开头，p判断有符号的字符串是不是关键字

ch=in[i];state=0;

while（ch==''||ch=='\t'||ch=='\n'）

{

if（ch=='\n'）{state=37;line1++;break;}

elsech=in[++i];

}

while

（1）

{

switch（state）

{

case0:

if（isletter（ch））{l=0;name[l++]=ch;state=40;getA（ch）;}//标识符

elseif（isdigit（ch））{l=0;name[l++]=ch;state=41;}//常数

elseif（ch=='+'）state=13;

elseif（ch=='-'）state=14;

elseif（ch=='*'）state=15;

elseif（ch=='/'）state=16;

elseif（ch=='%'）state=17;

elseif（ch=='>'）state=18;//接着检查>,>=,后赋值，错误

elseif（ch=='<'）state=20;

elseif（ch=='='）state=28;//接着检查==，=赋值和错误

elseif（ch=='!

'）state=24;

elseif（ch=='&'）state=25;

elseif（ch=='|'）state=26;

elseif（ch==','）state=27;

elseif（ch=='['）state=29;

elseif（ch==']'）state=30;

elseif（ch=='（'）state=31;

elseif（ch=='）'）state=32;

elseif（ch=='{'）state=33;

elseif（ch=='}'）state=34;

elseif（ch==';'）state=35;

elseif（ch=='.'）state=36;

elseif（ch=='\n'）state=37;

elseif（ch=='#'）{i--;return0;}

else{a=0;errA（4）;getA（ch）;return0;}break;

case40:

ch=in[++i];ah=in[i+1];

if（isletter（ch）||isdigit（ch））{state=40;name[l++]=ch;}

elsestate=42;break;

case41:

ch=in[++i];

if（isdigit（ch））{name[l++]=ch;state=41;}

elseif（isletter（ch））

{

if（s）

{

errA

（2）;getA（ch）;a=0;name[l++]=ch;state=41;

}

else{name[l++]=ch;state=41;}

s=0;

}

else

{

if（s）{state=43;}

else{state=42;}

}break;

case42:

name[l]='\0';strcpy（token[k].seman,name）;i--;

//判断该标识符是否为关键字

for（n=0;n<100;n++）

{

if（strcmp（name,keyword[n]）==0）

{

token[k++].class=n+1;

c=0;

break;

}

//判断该标识符是否存在标识符表中

if（c）

{

if（line!

=0）

{

intq=0;

while（q

{

if（strcmp（name,NameL[q++]）==0）

{

token[k].class=11;x=0;

}

if（x）

{

strcpy（NameL[line],name）;

fprintf（fout1,"%s\t（%d）\n",token[k].seman,m++）;

token[k].class=11;

}

line++;k++;

}

return0;

case43:

name[l]='\0';strcpy（token[k].seman,name）;i--;

//判断该常数是否存在常数表中

if（low!

=0）

{

intq=0;

while（q

{

if（strcmp（name,ConstL[q++]）==0）

{

k++;

return0;

}

strcpy（ConstL[low],name）;

fprintf（fout2,"%s\t（%d）\n",token[k].seman,h++）;

token[k].class=12;

low++;k++;return0;

case13:

token[k].class=13;strcpy（token[k].seman,"+"）;k++;return0;

case14:

token[k].class=14;strcpy（token[k].seman,"-"）;k++;return0;

case15:

token[k].class=15;strcpy（token[k].seman,"*"）;k++;return0;

case16:

token[k].class=16;strcpy（token[k].seman,"/"）;k++;return0;

case17:

token[k].class=17;strcpy（token[k].seman,"%"）;k++;return0;

case18:

ch=in[i+1];

if（ch=='='）state=62;

elseif（isletter（ch）||isdigit（ch）||ch==''||ch=='\t'||ch=='\n'）{state=63;}

else{state=63;a=0;errA（5）;getA（ch）;}break;

case62:

token[k].class=19;strcpy（token[k].seman,">="）;k++;i++;return0;

case63:

token[k].class=18;strcpy（token[k].seman,">"）;k++;return0;

case20:

ch=in[i+1];

if（ch=='='）state=21;

elseif（isletter（ch）||isdigit（ch）||ch==''||ch=='\t'||ch=='\n'）{state=64;}

else{state=64;a=0;errA（5）;getA（ch）;}break;

case21:

token[k].class=21;strcpy（token[k].seman,"<="）;k++;i++;return0;

case64:

token[k].class=20;strcpy（token[k].seman,"<"）;k++;return0;

case22:

token[k].class=22;strcpy（token[k].seman,"!

="）;k++;i++;return0;

case28:

ch=in[i+1];

if（ch=='='）{state=61;}

elseif（isletter（ch）||isdigit（ch）||ch==''||ch=='\t'||ch=='\n'）{state=67;}

else{state=67;a=0;errA（5）;getA（ch）;}break;

case61:

token[k].class=23;strcpy（token[k].seman,"=="）;k++;i++;return0;

case67:

token[k].class=28;strcpy（token[k].seman,"="）;k++;return0;

case24:

ch=in[i+1];

if（ch=='='）state=22;

elseif（isletter（ch）||isdigit（ch）||ch==''||ch=='\t'||ch=='\n'）{state=65;}

else{state=65;a=0;errA（5）;getA（ch）;}break;

case65:

token[k].class=24;strcpy（token[k].seman,"!

"）;k++;return0;

case25:

token[k].class=25;ah=in[++i];

if（ah=='&'）{strcpy（token[k].seman,"&&"）;k++;}

else{i--;strcpy（token[k].seman,"&"）;k++;a=0;errA（4）;getA（ch）;}return0;

case26:

token[k].class=26;ah=in[++i];

if（ah=='|'）{strcpy（token[k].seman,"||"）;k++;}

else{i--;strcpy（token[k].seman,"|"）;k++;a=0;errA（4）;getA（ch）;}return0;

case27:

token[k].class=27;strcpy（token[k].seman,","）;k++;return0;

case29:

token[k].class=29;strcpy（token[k].seman,"["）;k++;j=i;

if（ch）

{

ch=in[i++];

if（ch==']'）{i=j;j=0;break;}

}

if（j）{a=0;errA（3）;getA（ch）;i=j;}return0;

case30:

token[k].class=30;strcpy（token[k].seman,"]"）;k++;j=i;

while（ch）

{

ch=in[i++];

if（ch=='['）{i=j;j=0;break;}

}

if（j）{a=0;errA（3）;getA（ch）;i=j;}return0;

case31:

token[k].class=31;strcpy（token[k].seman,"（"）;k++;j=i;

while（ch）

{

ch=in[i++];

if（ch=='）'）{i=j;j=0;break;}

}

if（j）{a=0;errA（3）;getA（ch）;i=j;}return0;

case32:

token[k].class=32;strcpy（token[k].seman,"）"）;k++;return0;

case33:

token[k].class=33;strcpy（token[k].seman,"{"）;k++;

while（ch）

{

ch=in[i++];

if（ch=='}'）{i=j;j=0;break;}

}

if（j）{a=0;errA（3）;getA（ch）;i=j;}return0;

case34:

token[k].class=34;strcpy（token[k].seman,"}"）;k++;return0;

case35:

token[k].class=35;strcpy（token[k].seman,";"）;k++;return0;

case36:

token[k].class=36;strcpy（token[k].seman,"."）;k++;return0;

case37:

token[k].class=37;strcpy（token[k].seman,"换行符"）;k++;return0;

default:

a=0;break;

}

voidgetA（charch）

{

if（ch=='\n'）

{

line1++;

}

while（ch==''||ch=='\t'||ch=='\n'）

{

if（ch=='\n'）

{

line1++;

ch=in[++i];

}

voiderrA（inttype）

{

structerr*newerr=（structerr*）malloc（sizeof（structerr））;

errors++;

newerr->next=NULL;

newerr->line1=line1;

switch（type）

{

//case1:

strcpy（newerr->describe,"变量名要满足不能包括标点符号"）;break;

case2:

strcpy（newerr->describe,"不能以数字开头的数字与字母的字符串"）;break;

case3:

strcpy（newerr->describe,"括号要成对出现"）;break;

case4:

strcpy（newerr->describe,"词法分析不出现该符号"）;break;

case5:

strcpy（newerr->describe,"不满足比较规则"）;break;

}

if（g）

{

head=newerr;

head->next=NULL;

tail=head;

g=0;

}

else

{

tail->next=newerr;

tail=newerr;

tail->next=NULL;

}

intmain（）

{

intl=0;

charinput[100],output1[100],output2[100];

table=（structerr*）malloc（sizeof（structerr））;//定义错误链表结点

table->next=NULL;

tail=table;

printf（"******************c语言实现编译原理词法分析器******************\n\n"）;

//输入文件名的路径和文件名

printf（"请输入词法分析输入的文件名（包括路径）:

"）;

scanf（"%s",input）;

printf（"请输入词法分析输出的标识符表文件名（包括路径）:

"）;

scanf（"%s",output1）;

printf（"请输入词法分析输出的常数表文件名（包括路径）:

"）;

scanf（"%s",output2）;

fin=fopen（input,"r"）;

fout1=fopen（output1,"w"）;

fout2=fopen（output2,"w"）;

if（fin==NULL）

{

printf（"词法分析输入文件有错\n"）;

return

（1）;

}

printf（"\n词法分析结束\n"）;

while（!

feof（fin））

{

in[l++]=fgetc（fin）;

}

in[l]='#';

i=0;

while（in[i]!

='#'）

{

next_token（）;

i++;

}

if（a）

{token[k].class=38;strcpy（token[k].seman,"#"）;

k++;

printf（"\n\nscannerissuccend!

"）;

print_token（k）;

}

else

{

printf（"\n\nscannerisfalse!

"）;

print_token（k）;

}

printf（"\n\n错误总数为%d个\n",errors）;

tail=head;

while（tail）

{

printf（"\n第%d行\t错误为：

%s\n",tail->line1,tail->describe）;

tail=tail->next;

}

fclose（fin）;

fclose（fout1）;

fclose（fout2）;

getch（）;

return0;

}

展开阅读全文