Search code examples
gcccompiler-errorsgnuyacclex

Bison parser always prints "syntax error"


I am trying to build a 3 address code generator which would produce:

input:x=a+3*(b/7)


output: t1=b/7
t2=3*t1
t3=a+t2
x=t3

NO matter whatever i give as input the output is "syntax error".

I'm using Windows 10.

Yacc code:

   %{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define YYDEBUG 1

int yylex(void);
int t_count = 1;

void yyerror(char *s)
{
    fprintf(stderr,"%s\n",s);
    return;
}

char * generateToken(int i)
{

char* ch=(char*)malloc(sizeof(char)*5);

sprintf(ch,"t%d",i++);

return ch;
}

%}

%union { double dval; char ivar[50]; }
%token <ivar> NUMBER
%token  <ivar> NAME

%type <ivar> expr
%type <ivar> term
%left '+' '-'
%left '*' '/'
%left '(' ')'
%right '='
%%
program:
    line            {
                            }
    | program line                  {
                            }
    ;
line:
    expr    '\n'            {   
                                t_count =1; 
                            }
    | NAME '=' expr '\n'    {   

                                printf("%s = %s", $3,$1);
                                t_count=1;
                            }
    ;
expr:
    expr '+' expr           { 


                                strcpy($$,generateToken(t_count));

                                printf("%s = %s + %s",$$,$1,$3);

                            }
    | expr '-' expr         { 


                                    strcpy($$,generateToken(t_count));
                                printf("%s = %s - %s",$$,$1,$3);


                            }

    | expr '*' expr         { 

                                    strcpy($$,generateToken(t_count));
                                printf("%s = %s * %s",$$,$1,$3);

                            }
    | expr '/' expr         { 

                                    strcpy($$,generateToken(t_count));
                                printf("%s = %s / %s",$$,$1,$3);

                            }
    | term                  {               
                                strcpy($$, $1);
                            }
    | '(' expr ')'      {

                                strcpy($$,generateToken(t_count));
                                printf("%s =( %s )" ,$$,$2);

                            }
    ;
term:

    NAME                    { 

                                strcpy($$, $1);
                            }
    | NUMBER                {   
                                strcpy($$, $1);

                            }
    ;
%%

int main(void)
{
    if (getenv("YYDEBUG")) yydebug = 1;


    yyparse();

    return 0;
}

Lex code:

%option noyywrap
               %{
               #include <stdlib.h>
               #include <stdio.h>
               #include <string.h>
               #include "threeAdd.tab.h"

               void yyerror(char*);
               extern YYSTYPE yylval;
               %}
               NAME [a-zA-Z]
               DIGIT  [0-9]+
               NUMBER [-]?{DIGIT}+(\.{DIGIT}+)?

               %%
               [ \t]+    { }
               {NUMBER}
                    {


                    strcpy(yylval.ivar,yytext);
                    return *yylval.ivar;
                     }
               "+"  {
                        return *yytext;
                     }
               "-" {
                        return *yytext;
                     }
               "*"  {
                        return *yytext;
                     }
               "/"  {
                        return *yytext;
                     }
               "="   {
                        return *yytext;
                     }       
               "("   {
                        return *yytext;
                     }
               ")"   {
                        return *yytext;
                     }
               {NAME}    {


                        strcpy(yylval.ivar,yytext);
                        return *yylval.ivar;
                    }
               "\n"          {
                        return *yytext;
                     }
               exit     {
                        return 0;
                     }

               .        {
                        char msg[25];
                        sprintf(msg," <%s>","invalid character",yytext);
                        yyerror(msg);
                     }


               %%

Sample build & run:

  • C:\Users\USER\OneDrive\Desktop\Compiler\ICG>flex file.l
    C:\Users\USER\OneDrive\Desktop\Compiler\ICG>bison -d file.y
    C:\Users\USER\OneDrive\Desktop\Compiler\ICG>gcc lex.yy.c file.tab.c -o ICG.exe C:\Users\USER\OneDrive\Desktop\Compiler\ICG>ICG.exe 3+9

       syntax error
    

Solution

  • The basic problem is that you are use double-quote (" -- strings) for tokens in your yacc file (without defining any codes for them, so they're useless), and returning single character tokens in your lex file. As a result, none of the tokens will be recognized in your parser.

    Replace all the " characters with ' characters on all the single character tokens in your yacc file (so "+" becomes '+' and "\n" becomes '\n').

    Once you fix that, you have another problem: your lex rules for {DIGITS}+ and {NAME} don't return a token, so the token will be ignored (leading to syntax errors)

    For debugging parser problems in general, it is often worth compiling with -DYYDEBUG and sticking yydebug = 1; into main before calling yyparse, which will cause the parser to print a trace of tokens seen and states visited. I often put

    if (getenv("YYDEBUG")) yydebug = 1;
    

    into main and just leave it there -- that way normally debugging won't be enabled, but if you set the environment variable YYDEBUG=1 before running your program, you'll see the debug trace (no need to recompile)


    In order to return a token, your lexer rule needs to return the token. So your lexer rule for NUMBER should be:

    {NUMBER} {
                strcpy(yylval.ivar,yytext);
                return NUMBER;
             }
    

    and similar for NAME. Note that the opening { of the code block must be on the same line as the pattern -- if it is on a separate line it will not be associated with the pattern.