Lex构建词法分析器

under 技术  tag lex    Published on April 15th , 2019 at 11:52 am

Lex是linux平台下的一种词法分析程序生成器,它可以根据词法规则说明书的要求来生成单词识别程序,由该程序识别出输入文本中的各个单词

在Ubuntu下安装及使用Lex的流程为

sudo apt install flex
#编辑lex文件,保存为.l格式,在此设文件名为1.l
flex 1.l
gcc lex.yy.c -o 1 #编译生成的C文件
./1

Lex文件主要由三部分构成

[% 
// 定义部分,定义所需的C头文件,全局变量等
#include<stdio.h>
int yywrap();
%]
%% 
/*
规则部分,词法规则由模式和动作两部分组成,即正则表达式+C语言语句
注意Lex将识别结果存放在yytext[]中
*/
%%
/*
自定义子程序部分
如果编译不加-ll选项,则子程序部分必须包含main函数和yywrap函数
*/
void main(){
    yylex();
}
int yywrap(){
    return 1;
}
/*
int yywarp(void)
在文件(或输入)的末尾调用,如果函数的返回值是1,就停止解析
因此它可以用来解析多个文件,代码可以写在第三段,这样可以解析多个文件
方法是使用yyin文件指针指向不同的文件,直到所有的文件都被解析
最后,yywrap()可以返回1来表示解析的结束
*/

假设我们要完成一个基础的C词法分析器,要求如下

1: 关键字:
if else int return void while
2: 符号:
+ - * / < <= > >= == != = ; , ( ) [ ] { } /* */
3: ID和NUM:
ID = letter letter*
NUM = digit digit*
letter = a|…|z|A|...|Z
digit = 0|…|9

我的lex实现如下

%{
#include<stdio.h>
#include<string.h>
int yywrap();
char id[50][1005];
char tmpLine[100005];
char tmpString[100005];
int idNum = 0;
int lineNum = 1;
%}
%%
else   {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, else>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
if     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, if>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
int    {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, int>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
return {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, return>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
void   {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, void>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
while  {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, while>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}

\/\/[^\n]* {char tmp[1005]; sprintf(tmp, "\n\tline %d: <COMMENT, %s>", lineNum, yytext); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
"/*"([^\*]|(\*)*[^\*/])*(\*)*"*/" {
    char tmp[1005];
    char comment[1005];
    int p = 0,q = 0;
    while(q < strlen(yytext)){
        if(yytext[q] == '\n'){
            q++;
            comment[p] = 0;
            sprintf(tmp, "\n\tline %d: <COMMENT, %s>", lineNum, comment);
            strcat(tmpLine, " ");
            strcat(tmpLine, comment);
            strcat(tmpString, tmp);
            if(lineNum > 1)
                printf("\n");
            printf("line %d:%s%s", lineNum, tmpLine, tmpString);
            memset(tmpLine, 0, sizeof(tmpLine));
            memset(tmpString , 0, sizeof(tmpString));
            lineNum++;    
            p = 0;    
        }
        comment[p++] = yytext[q++];
    }
    comment[p] = 0;
    sprintf(tmp, "\n\tline %d: <COMMENT, %s>", lineNum, comment);
    strcat(tmpLine, " ");
    strcat(tmpLine, comment);
    strcat(tmpString, tmp);
}   
\+     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, +>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\-     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, ->", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\*     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, *>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\/     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, />", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\<     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, <>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\<\=   {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, <=>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\>     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, >>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\>\=   {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, >=>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\=\=   {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, ==>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\!\=   {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, !=>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\=     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, =>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\;     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <;>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\,     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, ,>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\(     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <(>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\)     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <)>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\[     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <[>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\]     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <]>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\{     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <{>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\}     {char tmp[1005]; sprintf(tmp, "\n\tline %d: <}>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}

[' '\t]+ {;}
[a-zA-Z]+  {
    int flag = 0;
    for(int i = 0;i < idNum;i++){
        if(!strcmp(yytext, id[i])){
            char tmp[1005];
            strcat(tmpLine, " "); 
            sprintf(tmp, "\n\tline %d: <ID, %d>", lineNum, i+1);
            strcat(tmpLine, yytext);
            strcat(tmpString, tmp);
            flag = 1;    
        }    
        if(flag)
            break;
    }
    if(!flag){
        strcpy(id[idNum], yytext);
        char tmp[1005];
        strcat(tmpLine, " "); 
        sprintf(tmp, "\n\tline %d: <ID, %d>", lineNum, ++idNum);
        strcat(tmpLine, yytext);
        strcat(tmpString, tmp);
    }
}

[0-9]+ {char tmp[1005]; strcat(tmpLine, " "); sprintf(tmp, "\n\tline %d: <NUM, %s>", lineNum, yytext); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\n  {
    if(lineNum > 1)
        printf("\n");
    printf("line %d:%s%s", lineNum, tmpLine, tmpString);
    memset(tmpLine, 0, sizeof(tmpLine));
    memset(tmpString , 0, sizeof(tmpString));
    lineNum++;
}

[a-zA-Z]+[0-9] {printf("Error in line %d\n", lineNum); exit(1);}
[0-9]+[a-zA-Z] {printf("Error in line %d\n", lineNum); exit(1);}
.      {printf("Error in line %d\n", lineNum); exit(1);}
%%
void main(){
    yylex();
    printf("\n");
}

int yywrap(){
    return 1;
}

本文由 cxh 创作,采用 知识共享署名4.0 国际许可协议进行许可,转载前请务必署名
  文章最后更新时间为:May 6th , 2019 at 05:39 pm
分享到:Twitter  Weibo  Facebook






  1. SkyFireLin

    cxhnb!

    Reply
    1. cxh
      @SkyFireLin

      ql图灵奖nb

      Reply
  2. Fruitking

    Very useful information!

    Reply