【问题标题】:Syntax error while parsing file using flex and bison使用 flex 和 bison 解析文件时出现语法错误
【发布时间】:2016-02-11 09:06:03
【问题描述】:

我正在解析以下文件:

BEGIN BLOCK BLK_ROWDEC            
        NAME                          cell_rowdec
        SIZE                          UNI_rowdecSize
        ITERATE                       itr_rows
        DIRECTION                     lgDir_rowdec
        STRAP                         STRD1,STRD3,STRD2
        WRAP                          WRD1
        VIA                           VIAB,VIAC,VIAD
ENDS BLK_ROWDEC

我的flex和bison文件如下:

lexa.l

%{
#include <iostream>
#include <stdio.h>
const char s[2] = " ";
#include "yacc.tab.h"
char *token;
#define YY_DECL extern "C" int yylex()
int line_num = 1;



using namespace std;
%}

DOT             "."
COLON           ":"
SEMICOLON       ";"
COMMA           ","
ANGLE_LEFT      "<"
ANGLE_RIGHT     ">"
AT              "@"
EQUAL           "="
SQUARE_OPEN     "["
SQUARE_CLOSE    [^\\]"]"
OPENBRACE       "\("
CLOSEBRACE      "\)"
QUOTE           "\""
QUOTE_OPEN      "\""
QUOTE_CLOSE     [^\\]"\""
SPACE           " "
TAB             "\t"
CRLF            "\r\n"
QUOTED_PAIR     "\\"[^\r\n]
DIGIT           [0-9]
ALPHA           [a-zA-Z]
QTEXT           [0-9a-zA-Z!#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]


%%



[ \t]         ;


^BEGIN(.*)\r?\n+\s*BEGIN(.*)     {   printf("\nError : two continous BEGIN is not allowed : "); }

^ENDS(.*)\r?\n+\s*ENDS(.*)   {   printf("\nError : two continous END is not allowed : \n"); }




NAME          {  yylval.sval = strdup(yytext);
    return TOK_NAME; }

SIZE          { yylval.sval = strdup(yytext);
      return TOK_SIZE; }

ITERATE       { yylval.sval = strdup(yytext);
       return TOK_ITERATE; }

DIRECTION   { yylval.sval = strdup(yytext);
      return TOK_DIRECTION; }

STRAP       { yylval.sval = strdup(yytext);
      return TOK_STRAP; }

WRAP        { yylval.sval = strdup(yytext);
      return TOK_WRAP; }

VIA     { yylval.sval = strdup(yytext);
      return TOK_VIA; }

ENDS        { yylval.sval = strdup(yytext);
      return TOK_END; }

BEGIN       { yylval.sval = strdup(yytext);
      return TOK_BEGIN; }

BLOCK       { yylval.sval = strdup(yytext);
      return TOK_BLOCK; }




[a-zA-Z0-9_,]+    { yylval.sval = strdup(yytext);
      return TOK_STRING; }


{SPACE}*          { return TOK_SPACE; }




^ENDS(.*)$        {}


^{CRLF}                         { return TOK_EMPTY_LINE; }
{CRLF}                          {}
.                               {}/* ignore unknown chars */
\n                      { ++line_num; return ENDL; }

yacca.y

%{
#include <cstdio> 
#include <cstring>
#include <iostream>
#include <stdio.h>

#define YYDEBUG 1

using namespace std;

extern "C" int yylex();
extern "C" FILE *yyin;
extern int line_num;


void yyerror(const char* s);
%}

// Symbols.
%union
{
    char* sval;
};

%token <sval> TOK_NAME
%token <sval> TOK_SIZE
%token <sval> TOK_STRING
%token <sval> TOK_ITERATE
%token <sval> TOK_DIRECTION
%token <sval> TOK_STRAP
%token <sval> TOK_WRAP
%token <sval> TOK_VIA
%token <sval> TOK_EMPTY_LINE 
%token <sval> TOK_BLOCK
%token <sval> TOK_LINE
%token <sval> TOK_BEGIN
%token <sval> TOK_END
%token TOK_SPACE
%token END ENDL



%%

language : program ;


program : block
| program block
;

block   : TOK_BEGIN TOK_BLOCK TOK_SPACE TOK_STRING blockcontents TOK_END TOK_SPACE TOK_STRING 
  {
    cout << endl << "SHAILAVI" << $4 << " ";

  }
;


blockcontents : item
      | blockcontents item
      ;


item    :   TOK_SPACE TOK_NAME TOK_SPACE TOK_STRING         
    { 
        cout << endl << "Value:" << $2 << "->" << $4 << "  "; 
    }
    | TOK_SPACE TOK_SIZE TOK_SPACE TOK_STRING        { cout << $2 << "->" << $4 << "  "; }
    | TOK_SPACE TOK_ITERATE TOK_SPACE TOK_STRING     { cout << $2 << "->" << $4 << "  ";  }
    | TOK_SPACE TOK_DIRECTION TOK_SPACE TOK_STRING   { cout << $2 << "->" << $4 << "  " << endl; }
    | TOK_SPACE TOK_STRAP TOK_SPACE TOK_STRING       { cout << "ref:" << $2 << "->" << $4 << "  "; }
    | TOK_SPACE TOK_WRAP TOK_SPACE TOK_STRING    { cout << $2 << "->" << $4 << "  "; }
    | TOK_SPACE TOK_VIA TOK_SPACE TOK_STRING     { cout << $2 << "->" << $4 << "  " << endl; }
    ;



%%



int main(void) {
    FILE * pt = fopen("file", "r" );
    if(!pt)
    {
    cout << "Bad Input.Noexistant file" << endl;
    return -1;
    }
    yyin = pt;
    do
    {
    yydebug = 1;
        yyparse();
    }while (!feof(yyin));      
}
void yyerror(const char *s) {
    cout << "parse error on line " << line_num << "!  Message: " << s << endl;
    exit(-1);
}


#include "lex.yy.c"

编译步骤:

flex lexa.l
bison -d yacca.y
g++ yacca.tab.c -lfl -o scanner.exe

在执行时它会在 blockcontents

附近提供syntax error

请帮我找出我犯的错误。

非常感谢。

【问题讨论】:

    标签: parsing block flex-lexer bisonc++


    【解决方案1】:

    我花了一段时间,但我发现了缺陷。

    在您的词法分析器中,您会跳过所有的制表符和空格序列(第一条规则)。 但是您的解析器时不时地需要空白。因此语法错误。

    由于您不对空格做任何事情,只需在词法分析器中吃掉它们(实际上您现在已经这样做了,但最好也消除 {SPACE}* 规则)并消除解析器中的 TOK_SPACE .

    ---- 编辑给出一些提示----

    我为追踪该错误所做的是:

    1. 使词法分析器变得冗长 我添加了(省略了哈希符号;由于某种原因它混淆了渲染器)
    
             #ifdef DEBUG
             #define RETURN(x)       cerr << "\n--> found " << #x << "\n"; return x;
             #else
             #define RETURN(x)       return x;
             #endif
    

    并将所有“return something”替换为 RETURN(something)

    1. 我分别编译 bison/flex 文件,然后链接它们
    
        flex lexa.l && \
        bison -d yacca.y && \
        g++ -c -DDEBUG -I . lex.yy.c && \
        g++ -c -I . yacca.tab.c && \
        g++ lex.yy.o yacca.tab.o -o scanner
    

    (这里在 linux 上工作)

    1. 根据工作示例的要求
    
        %{
        #include <cstdio>
        #include <cstring>
        #include <iostream>
        #include <stdio.h>
    
        #define YYDEBUG 1
    
        using namespace std;
    
        extern "C" int yylex();
        extern "C" FILE *yyin;
        extern int line_num;
    
    
        void yyerror(const char* s);
        %}
    
        // Symbols.
        %union
        {
            char* sval;
        };
    
        %token  TOK_NAME
        %token  TOK_SIZE
        %token  TOK_STRING
        %token  TOK_ITERATE
        %token  TOK_DIRECTION
        %token  TOK_STRAP
        %token  TOK_WRAP
        %token  TOK_VIA
        %token  TOK_EMPTY_LINE
        %token  TOK_BLOCK
        %token  TOK_LINE
        %token  TOK_BEGIN
        %token  TOK_END
        %token END ENDL
    
        %%
    
        language : program ;
    
    
        program : block
        | program block
        ;
    
        block   : TOK_BEGIN TOK_BLOCK TOK_STRING blockcontents TOK_END TOK_STRING 
          {
            cout << endl << "SHAILAVI" << $3 << " ";
    
          }
        ;
    
    
        blockcontents : item
              | blockcontents item
              ;
    
    
        item    :   TOK_NAME TOK_STRING         { cout << endl << "Value:" << $1 << "->" << $2 << "  "; }
            | TOK_SIZE TOK_STRING        { cout << $1 << "->" << $2 " << $2 " << $2 " << $2 << "  "; }
            | TOK_WRAP TOK_STRING    { cout << $1 << "->" << $2 << "  "; }
            | TOK_VIA TOK_STRING     { cout << $1 << "->" << $2 << "  " << endl; }
            ;
    
        %%
    
        int main(void) {
            FILE * pt = fopen("./input", "r" );
            if(!pt)
            {
            cout << "Bad Input.Nonexistent file" << endl;
            return -1;
            }
            yyin = pt;
            do
            {
            yydebug = 1;
                yyparse();
            }while (!feof(yyin));
        }
        void yyerror(const char *s) {
            cout << "parse error on line " << line_num << "!  Message: " << s << endl;
            exit(-1);
        }
        extern "C" int yywrap()
        {
                return (1 == 1);
        }
    

    还有词法分析器

    
        %{
        #include 
        #include 
        const char s[2] = " ";
        #include "yacca.tab.h"
        char *token;
        #define YY_DECL extern "C" int yylex()
        int line_num = 1;
    
        #ifdef DEBUG
        #define RETURN(x)       cerr << "\n--> found " << #x << "\n"; return x;
        #else
        #define RETURN(x)       return x;
        #endif
    
    
        using namespace std;
        %}
    
        DOT             "."
        COLON           ":"
        SEMICOLON       ";"
        COMMA           ","
        ANGLE_LEFT      ""
        AT              "@"
        EQUAL           "="
        SQUARE_OPEN     "["
        SQUARE_CLOSE    [^\\]"]"
        OPENBRACE       "\("
        CLOSEBRACE      "\)"
        QUOTE           "\""
        QUOTE_OPEN      "\""
        QUOTE_CLOSE     [^\\]"\""
        SPACE           " "
        TAB             "\t"
        CRLF            "\r\n"
        QUOTED_PAIR     "\\"[^\r\n]
        DIGIT           [0-9]
        ALPHA           [a-zA-Z]
        QTEXT           [0-9a-zA-Z!#$%&'()*+,\-.\/:;?@\[\]^_`{|}~]
    
    
        /* [ \t]         ; */
        %%
    
        ^BEGIN(.*)\r?\n+\s*BEGIN(.*)     {   printf("\nError : two continous BEGIN is not allowed : "); }
    
        ^ENDS(.*)\r?\n+\s*ENDS(.*)   {   printf("\nError : two continous END is not allowed : \n"); }
    
    
    
    
        NAME          {  yylval.sval = strdup(yytext);
            RETURN(TOK_NAME); }
    
        SIZE          { yylval.sval = strdup(yytext);
              RETURN(TOK_SIZE); }
    
        ITERATE       { yylval.sval = strdup(yytext);
               RETURN(TOK_ITERATE); }
    
        DIRECTION   { yylval.sval = strdup(yytext);
              RETURN(TOK_DIRECTION); }
    
        STRAP       { yylval.sval = strdup(yytext);
              RETURN(TOK_STRAP); }
    
        WRAP        { yylval.sval = strdup(yytext);
              RETURN(TOK_WRAP); }
    
        VIA     { yylval.sval = strdup(yytext);
              RETURN(TOK_VIA); }
    
        ENDS        { yylval.sval = strdup(yytext);
              RETURN(TOK_END); }
    
        BEGIN       { yylval.sval = strdup(yytext);
              RETURN(TOK_BEGIN); }
    
        BLOCK       { yylval.sval = strdup(yytext);
              RETURN(TOK_BLOCK); }
    
    
        [a-zA-Z0-9_,]+    { yylval.sval = strdup(yytext); RETURN(TOK_STRING); }
    
        ^ENDS(.*)$        {}
    
    
        ^{CRLF}                         { RETURN(TOK_EMPTY_LINE); }
        {CRLF}                          {}
        .                               {}/* ignore unknown chars */
        \n                      { ++line_num; /* RETURN(ENDL); */ }
    

    只剩下一个问题了。它真的不喜欢EOF。我会把它留作练习。

    【讨论】:

    • 获得正确的格式有点像 PITA。抱歉,拖了这么久。
    • 谢谢。你帮了我很多@Ronald
    • 问题还没有解决,因为在blockcontents之后它无法读取TOK_END,这对我来说是必要的。
    猜你喜欢
    • 2012-09-21
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2018-12-31
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多