Search code examples
bisonflex-lexerspacebisonc++

Ignore white space in flex and bison


I am trying to parse following lines:

BEGIN WRAP WIO3     
    NAME                            lgCell_prg160_io
    CONDITION                       UNI_PLACE_GLOBAL && compBits
ENDS WIO3       

The Grammar which I used to parse above lines are parsing till UNI_PLACE_GLOBAL then it gives parsing error.

Please help me to identify the mistake I have done.

I think it doesn't accept space so How can I allow to parse these lines??

I have done

lex.l

%{
#include <iostream>
#include <stdio.h>
const char s[2] = " ";
#include "yacca.tab.h"
char *token;
#define YY_DECL extern "C" int yylex()
int line_num = 1;

#ifdef DEBUG
#define RETURN(x)       cerr << "\n--> found " << #x << "\n"; return x;
#else
#define RETURN(x)       return x;
#endif


using namespace std;
%}

DOT             "."
COLON           ":"
SEMICOLON       ";"
COMMA           ","
ANGLE_LEFT      "<"
ANGLE_RIGHT     ">"
AT              "@"
EQUAL           "="
SQUARE_OPEN     "["
SQUARE_CLOSE    [^\\]"]"
OPENBRACE       "\("
CLOSEBRACE      "\)"
QUOTE           "\""
QUOTE_OPEN      "\""
QUOTE_CLOSE     [^\\]"\""
SPACE           " "
TAB             "\t"
CRLF            "\r\n"
QUOTED_PAIR     "\\"[^\r\n]
DIGIT           [0-9]
ALPHA           [a-zA-Z]
QTEXT           [0-9a-zA-Z!#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~""]

/* [ \t]         ; */


/* [ \s]         ; */



%%

[ \t]       ;

"//".*      /* ignore comments */

NAME          {  yylval.sval = strdup(yytext);
    return TOK_NAME; }

SIZE          { yylval.sval = strdup(yytext);
      return TOK_SIZE; }

ITERATE       { yylval.sval = strdup(yytext);
       return TOK_ITERATE; }

DIRECTION   { yylval.sval = strdup(yytext);
      return TOK_DIRECTION; }

STRAP       { yylval.sval = strdup(yytext);
      return TOK_STRAP; }

WRAP        { yylval.sval = strdup(yytext);
      return TOK_WRAP; }

VIA     { yylval.sval = strdup(yytext);
      return TOK_VIA; }

ENDS        { yylval.sval = strdup(yytext);
      return TOK_END; }

BEGIN       { yylval.sval = strdup(yytext);
      return TOK_BEGIN; }

BLOCK       { yylval.sval = strdup(yytext);
      return TOK_BLOCK; }

VERSION     { yylval.sval = strdup(yytext);
      return TOK_VERSION; }


STRUCTURE       { yylval.sval = strdup(yytext);
              return TOK_STRUCT; }

UNIQUE          { yylval.sval = strdup(yytext);
              return TOK_UNIQUE; }


REF         { yylval.sval = strdup(yytext);
              return TOK_REF; }

POSITION            { yylval.sval = strdup(yytext);
                  return TOK_POS; }

CONDITION           { yylval.sval = strdup(yytext);
                  return TOK_CON; }

ORIENTATION         { yylval.sval = strdup(yytext);
                  return TOK_ORI; }

{QTEXT}*        { yylval.sval = strdup(yytext);
          return TOK_STRING; } 


\\\n            { printf("c> "); }      /* ignore line continuation */


^{CRLF}                         { return TOK_EMPTY_LINE; }
{CRLF}                          {}
.                               {}/* ignore unknown chars */
\n                      { ++line_num; /*RETURN(ENDL); */ }

yacc.y

%{
#include <cstdio> 
#include <cstring>
#include <iostream>
#include <stdio.h>

#define YYDEBUG 1

using namespace std;

extern "C" int yylex();
extern "C" FILE *yyin;
extern int line_num;


void yyerror(const char* s);
%}

// Symbols.
%union
{
    char* sval;
};

%token <sval> TOK_NAME
%token <sval> TOK_SIZE
%token <sval> TOK_STRING
%token <sval> TOK_ITERATE
%token <sval> TOK_DIRECTION
%token <sval> TOK_STRAP
%token <sval> TOK_WRAP
%token <sval> TOK_VIA
%token <sval> TOK_EMPTY_LINE 
%token <sval> TOK_BLOCK
%token <sval> TOK_LINE
%token <sval> TOK_BEGIN
%token <sval> TOK_END
%token <sval> TOK_VERSION
%token <sval> TOK_STRUCT
%token <sval> TOK_UNIQUE
%token <sval> TOK_REF
%token <sval> TOK_POS
%token <sval> TOK_CON
%token <sval> TOK_ORI
%token END ENDL




%%

language : program ;

program : block
| program block
;


block   : TOK_BEGIN TOK_BLOCK TOK_STRING blockcontents TOK_END TOK_STRING 
  {
    if (strcmp($3,$6) == 0 )
    {
        printf("\nHEADER %s ",$2);
        printf("\nID %s ",$3);
    }
    else
    {
        printf("Block %s is not able to find its END\n" , $3);
    }

  }
  | TOK_BEGIN TOK_BLOCK TOK_STRING blockcontents TOK_END  { printf("Block %s is not able to find its END\n" , $3); }

  | TOK_BEGIN TOK_STRING blockcontents TOK_END TOK_STRING {}
  | TOK_BEGIN TOK_STRUCT TOK_STRING blockcontents TOK_END TOK_STRING
  {
    if (strcmp($3,$6) == 0 )
    {
        printf("\nHEADER %s ",$2);
        printf("\nID %s \n",$3);
    }
    else
    {
        printf("Block %s is not able to find its END\n" , $3);
    }

  }
  | TOK_BEGIN TOK_STRAP TOK_STRING blockcontents TOK_END TOK_STRING
  {
    if (strcmp($3,$6) == 0 )
    {
        printf("\nHEADER %s ",$2);
        printf("\nID %s \n",$3);
    }
    else
    {
        printf("Block %s is not able to find its END\n" , $3);
    }
  }
  | TOK_BEGIN TOK_WRAP TOK_STRING blockcontents TOK_END TOK_STRING
;


blockcontents : item
      | blockcontents item
      ;


item    : TOK_NAME TOK_STRING        { cout << endl << $1 << "->" << $2 << "  "; }
| TOK_SIZE TOK_STRING        { cout << $1 << "->" << $2 << "  "; }
| TOK_ITERATE TOK_STRING     { cout << $1 << "->" << $2 << "  ";  }
| TOK_DIRECTION TOK_STRING   { cout << endl << $1 << "->" << $2 << "  " << endl; }
| TOK_STRAP TOK_STRING       { cout  << $1 << "->" << $2 << "  "; }
| TOK_WRAP TOK_STRING        { cout << $1 << "->" << $2 << "  "; }
| TOK_VIA TOK_STRING         { cout << $1 << "->" << $2 << "  " << endl; }
| TOK_VERSION TOK_STRING     {}
| TOK_UNIQUE TOK_STRING      { cout << endl << $1 << "->" << $2 << "  " << endl; }
| TOK_REF TOK_STRING         { cout << endl << $1 << "->" << $2 << "  " << endl; }
| TOK_POS TOK_STRING         { cout << endl << $1 << "->" << $2 << "  " << endl; }
| TOK_CON TOK_STRING         { cout << endl << $1 << "->" << $2 << "  " << endl; }
| TOK_ORI TOK_STRING         { cout << endl << $1 << "->" << $2 << "  " << endl; }  
| block
;




%%



int main(void) {
FILE * pt = fopen("LG.txt", "r" );
if(!pt)
{
cout << "Bad Input.Noexistant file" << endl;
return -1;
}
yyin = pt;
do
{
//yydebug = 1;
    yyparse();
}while (!feof(yyin));      
}
void yyerror(const char *s) {
    cout << "parse error on line " << line_num << "!  Message: " << s << endl;
    exit(-1);
}

extern "C" int yywrap()
{
        return (1 == 1);
}



#include "lex.yy.c"

Solution

  • What you need to do it enable the debug mode of the lexer using the -d option flag and also enable the parse trace mode using the -t flag. You also have to add a couple of lines to the main program to enable the parser trace:

    Add these lines to ayacc.y after the int main(void) {:

        extern int yydebug;
        yydebug = 1;
    

    Now build with debug and trace:

    flex -d lex.l
    bison -t -d ayacc.y
    

    Now when you run you get this:

    Starting parse
    Entering state 0
    Reading a token: --(end of buffer or a NUL)
    --accepting rule at line 80 ("BEGIN")
    Next token is token TOK_BEGIN ()
    Shifting token TOK_BEGIN ()
    Entering state 1
    Reading a token: --accepting rule at line 52 (" ")
    --accepting rule at line 71 ("WRAP")
    Next token is token TOK_WRAP ()
    Shifting token TOK_WRAP ()
    Entering state 7
    Reading a token: --accepting rule at line 52 (" ")
    --accepting rule at line 109 ("WIO3")
    Next token is token TOK_STRING ()
    Shifting token TOK_STRING ()
    Entering state 29
    Reading a token: --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 119 ("
    ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 56 ("NAME")
    Next token is token TOK_NAME ()
    Shifting token TOK_NAME ()
    Entering state 12
    Reading a token: --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 109 ("lgCell_prg160_io")
    Next token is token TOK_STRING ()
    Shifting token TOK_STRING ()
    Entering state 32
    Reducing stack by rule 12 (line 109):
       $1 = token TOK_NAME ()
       $2 = token TOK_STRING ()
    
    -> $$ = nterm item ()
    Stack now 0 1 7 29
    Entering state 27
    Reducing stack by rule 10 (line 104):
       $1 = nterm item ()
    -> $$ = nterm blockcontents ()
    Stack now 0 1 7 29
    Entering state 48
    Reading a token: --accepting rule at line 119 ("
    ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 103 ("CONDITION")
    Next token is token TOK_CON ()
    Shifting token TOK_CON ()
    Entering state 23
    Reading a token: --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 52 (" ")
    --accepting rule at line 109 ("UNI_PLACE_GLOBAL")
    Next token is token TOK_STRING ()
    Shifting token TOK_STRING ()
    Entering state 43
    Reducing stack by rule 23 (line 120):
       $1 = token TOK_CON ()
       $2 = token TOK_STRING ()
    NAME->lgCell_prg160_io  
    CONDITION->UNI_PLACE_GLOBAL  
    -> $$ = nterm item ()
    Stack now 0 1 7 29 48
    Entering state 46
    Reducing stack by rule 11 (line 105):
       $1 = nterm blockcontents ()
       $2 = nterm item ()
    -> $$ = nterm blockcontents ()
    Stack now 0 1 7 29
    Entering state 48
    Reading a token: --accepting rule at line 52 (" ")
    --accepting rule at line 109 ("&&")
    Next token is token TOK_STRING ()
    parse error on line 3!  Message: syntax error
    

    You can see that it is correctly ignoring the spaces using the lexer rule. Your problem is that the && symbol is being recognised as a TOK_STRING, which is not what is expected by the grammar rules.

    Not knowing what the correct grammar for your language should be (you did not say) I cannot fix this for you without more information. You might want to fix the TOK_STRING rule to only match strings!