Search code examples
parsingflex-lexeryacc

FLEX/YACC program not behaving as expected : can't grab int value from sequence of ints


I am trying to build a parser that takes a list of strings in the following format and performs either an addition or multiplication of all of its elements :

prod 5-6_
sum _
sum 5_
sum 5-6-7_
$

Should print the following to the screen :

prod = 30
sum = 0
sum = 5
sum = 18

What I am actually getting as output is this :

prod = 0
sum = 0
sum = 5
sum = 5

My lex file looks like this :

%{
        #include <iostream>
        #include "y.tab.h"
        using namespace std;
        extern "C" int yylex();
%}

%option yylineno

digit    [0-9]
integer  {digit}+
operator "sum"|"prod"

%%

{integer}   { return number; }
{operator}  { return oper; }
"-"         { return '-'; }
"_"         { return '_'; }
"$"         { return '$'; }
\n          { ; }
[\t ]+      { ; }
.           { cout << "unknown char" << endl; }

%%

and my yacc file looks like this :

%token oper
%token number
%token '-'
%token '_'
%token '$'

%start valid


%{
        #include <iostream>
        #include <string>
        #include <cstdio>
        #include <cstdlib>
        using namespace std;

        #define YYSTYPE int 

        extern FILE *yyin;
        extern char yytext[];
        extern "C" int yylex();
        int yyparse();
        extern int yyerror(char *);

        char op;
%}

%%

valid           : expr_seq endfile      {}
                | {}
                ;

expr_seq        : expr                  {}
                | expr_seq expr         {}
                ;

expr            : op sequence nl        {if (op == '+') cout << "sum = " ; else cout << "prod =  ";}
                | op nl                 {if (op == '+') cout << "sum = 0"; else  cout <<"prod = 1";}                    
                ;

op              : oper                      { if (yytext[0] == 's') op = '+'; else op = '*';}
                ;

sequence        : number                    { $$ = atoi(yytext);}
                | sequence '-' number        { if (op == '+') $$ = $1 + $3; else $$ = $1 * $3;}
                ;

nl              : '_'                   { cout << endl;}
                ;

endfile         : '$'                   {}
                ;

%%

int main(int argc, char *argv[])
{
        ++argv, --argc;
        if(argc > 0)    yyin = fopen(argv[0], "r");
        else            yyin = stdin;

        yyparse(); 

        return 0;
} 

int yyerror(char * msg)
{
        extern int yylineno;
        cerr << msg << "on line # " << yylineno  << endl;

        return 0;
} 

My reasoning for the yacc logic is as follows :

  • a file is valid only if it contains a sequence of expressions followed by the endfile symbol.
  • a sequence of expressions is a single expression or several expressions.
  • an expression is either an operator followed by a new line, OR an operator, followed by a list of numbers, followed by a new line symbol.
  • an operator is either 'sum' or 'prod'
  • a list of numbers is either a number or several numbers separated by the '-' symbol.

From my perspective this should work, but for some reason it doesn't interpret the sequence of numbers properly after the first element. Any tips would be helpful.

Thanks


Solution

  • The solution that worked was simply to change the following lines :

    sequence        : number                    { $$ = atoi(yytext);}
                    | sequence '-' number        { if (op == '+') $$ = $1 + $3; else $$ = $1 * $3;}
                    ;
    

    to this :

     sequence        : number                    { $$ = atoi(yytext);}
                        | sequence '-' number        { if (op == '+') $$ = $1 + atoi(yytext); else $$ = $1 * atoi(yytext);}
                        ;