Search code examples
bisonflex-lexeryacclexambiguous-grammar

Syntax Error in Lex and Yacc caused by scanner or parser


I am pretty new in Lex and Yacc. I try to learn about grammar rules and semantic actions. I was trying to write a parser that basically does assignments, function declarations, function calls and print statements. The problem is that after I give an input I get output as syntax error. So I'm thinking that my grammar causes this but I'm not sure. Here are my files:

scanner.flx:

%option noyywrap
%option yylineno

%{

#include "parser.tab.h"

%}

IDENT [a-zA-Z_][a-zA-Z0-9_]*
INT -?[0-9]+
STRING "[\S\s]*?"
UNFSTRING "[\S\s]*?[^"]$

%%
"int" return tINT;
"string" return tSTRING;
"return" return tRETURN;
"print" return tPRINT;
"(" return tLPAR;
")" return tRPAR;
"," return tCOMMA;
"%" return tMOD;
"=" return tASSIGNM;
"-" return tMINUS;
"+" return tPLUS;
"/" return tDIV;
"*" return tSTAR;
";" return tSEMI;
"{" return tLBRAC;
"}" return tRBRAC;
{IDENT} return tIDENT;
{INT} return tINTVAL;
{STRING} return tSTRINGVAL;
{UNFSTRING} return tUNFSTRING;
[ \t\n]+
. { /* pass any other character to the parser */
  return yytext[0];
}
%%

parser.y:

%{
#include <stdio.h>

void yyerror (const char *s)
{
    printf ("%s\n", s);
}
%}

%token tINT tSTRING tRETURN tPRINT tLPAR tRPAR tCOMMA tMOD tASSIGNM tMINUS tPLUS tDIV tSTAR tSEMI tLBRAC tRBRAC tIDENT tINTVAL tSTRINGVAL tUNFSTRING

%left '='
%left '+' '-'
%left '*' '/'
%left '(' ')'


%%

CVD19       :              stmtlst
;

stmtlst     :              stmtlst stmt
            |              stmt
            ;

stmt        :              funcDecl
            |              varDecl
            |              assgnmt
            |              callfunc
            |              printstmt
            ;

funcDecl    :              type tIDENT '(' ')' '{' funcbody return '}'              {  printf("FUNCTION ");  }
            |              type tIDENT '(' funcparams ')' '{' funcbody return '}'   {  printf("FUNCTION W/PARAMS ");  }
            ;

funcbody    :              varDecl
            |              assgnmt
            |              callfunc
            |              printstmt
            ;

return      :              tRETURN expr ';'
            ;

funcparams  :              funcparams ',' type tIDENT
            |              type tIDENT
            ;

varDecl     :              type vars '=' expr ';'
            ;

type        :              tINT              {  printf("INT TYPE ");  }
            |              tSTRING           {  printf("STRING TYPE ");  }
            ;

assgnmt     :              tIDENT '=' expr ';'           {  printf("ASSIGNMENT");  }
            ;

callfunc    :              tIDENT '(' ')' ';'            {  printf("FUNCTION CALL");  }
            |              tIDENT '(' vars ')' ';'       {  printf("FUNCTION W/PARAMs CALL");  }
            ;

printstmt   :              tPRINT '(' expr ')' ';'          {  printf("PRINTSTMT 1");  }
            |              tPRINT '(' callfunc ')' ';'      {  printf("PRINTSTMT 2");  }
            ;


vars        :              vars ',' tIDENT
            |              tIDENT            {  printf("IDENT ");  }
            ;

expr        :              value
            |              expr '+' expr     {    $$  =  $1  +  $3;  }
            |              expr '-' expr     {    $$  =  $1  -  $3;  }
            |              expr '*' expr     {    $$  =  $1  *  $3;  }
            |              expr '/' expr     {    $$  =  $1  /  $3;  }
            ;         

value       :              tINTVAL                                {  printf("INTVAL ");  }
            |              tSTRINGVAL                             {  printf("STRINGVAL ");  }
            |              tUNFSTRING                             {  printf("UNFSTRING ");  }
            /*|              tIDENT    MIGHT BE PROBLEMATIC     { $$ = $1; }*/
            ;

%%

int main ()
{
   if (yyparse()) {
   // parse error
       printf("ERROR\n");
       return 1;
   }
   else {
   // successful parsing
      printf("OK\n");
      return 0;
   }
}

While I try to run my files in MacOS Terminal, I use these commands as normal:

flex scanner.flx

-- NO PROBLEMS --

bison -d parser.y

-- NO PROBLEMS --

gcc -o program lex.yy.c parser.tab.c -ll

-- WARNING --

parser.tab.c:1330:16: warning: implicit declaration of function 'yylex' is invalid in C99
      [-Wimplicit-function-declaration]
      yychar = YYLEX;
               ^
parser.tab.c:686:16: note: expanded from macro 'YYLEX'
# define YYLEX yylex ()
               ^
1 warning generated.

Here is around line 1330 in parser.tab.c:

/* First try to decide what to do without reference to look-ahead token.  */
  yyn = yypact[yystate];
  if (yyn == YYPACT_NINF)
    goto yydefault;

  /* Not known => get a look-ahead token if don't already have one.  */

  /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol.  */
  if (yychar == YYEMPTY)
    {
      YYDPRINTF ((stderr, "Reading a token: "));
      yychar = YYLEX;  /* THIS IS LINE 1330 <=============================================
    }

  if (yychar <= YYEOF)
    {
      yychar = yytoken = YYEOF;
      YYDPRINTF ((stderr, "Now at end of input.\n"));
    }
  else
    {
      yytoken = YYTRANSLATE (yychar);
      YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
    }

Here are my inputs:

input1:

int num = 123;

output1:

INT TYPE IDENT syntax error
ERROR

input2:

print("str");

output2:

syntax error
ERROR

input3:

int func(int i) {
 i = 5; 
 return i;
}

output3:

INT TYPE IDENT syntax error
ERROR

Solution

  • Your basic problem is that you are using character literals in your grammar (which is good) but not returning them in your lexer (which is not good).

    Instead of

    "(" return tLPAR;
    ")" return tRPAR;
    // etc.
    

    Just let these characters fall through into your fallback rule:

    .  { return yytext[0]; }
    

    Then you can also get rid of the %token definitions for these single-character tokens since you are using single-character literals.

    You can't do that with longer tokens, unfortunately. So your keyword tokens have to stay the way they are.

    Also, your rule for strings is completely wrong. Please read the documentation for (f)lex regular expressions instead of relying on some other regular expression syntax. Flex does not recognize the \S and \s escapes. It does not implement non-greedy repetition (*?). It does use " as a special syntax (meaning quoted literal strings) -- in fact, you've used that in other rules, so you shouldn't expect " to be a regular character in your STRING format. And $ cannot be used in a macro (and, indeed, there's no good reason to use macros in this scanner definition; I always recommend avoiding them unless there's a good reason for them.)

    One possible string action is:

    ["]([^"]|\\.|\\\n)*["]   { return tSTRINGVAL; }
    

    I strongly recommend that you read the chapter of the bison manual on debugging your grammar, particularly the section on how to enable parser tracing, which is a lot more exact and informative than inserting printf calls into your parser actions. (That would have shown you your problem, in fact.)


    Your problem is not related to the warning produced by the compiler, but you should fix that. It happens because you haven't declared yylex in your bison prolog. Put this just before your definition of yyerror:

    int yylex(void);
    

    So that the compiler knows what the prototype of yylex is. (You have to declare it because bison doesn't do that for you.)