I was expecting shift/reduce conlict for if else, but It gave reduce/reduce conflict on line "| IF '(' boolean_statement ')' block".
Here's some information which might help explain the following code:
BOOL
is token for keyword used at start of each line to indicate line is boolean operation
BOOLEAN
is either "true" or "false" value
I am using this compiler to convert a language to C code, the language can contain statements like a,b,c=d+2
which is equivalent to a=b=c=d+2
in C; and bool e = f * .N. g + h
, which is equivalent to e = f && !g || h
.
statements:
statements statement
| statement
;
statement:
if_statement
| BOOL variable_list '=' boolean_statement
| variable_list '=' integer_statement
;
if_statement:
IF '(' boolean_statement ')' block ELSE block
| IF '(' boolean_statement ')' block
;
variable_list:
variable_list ',' variable
| variable
;
variable:
STRING
| STRING '[' INTEGER ']'
| STRING '[' STRING ']'
;
boolean_statement:
'(' boolean_statement ')'
| bval '*' boolean_statement
| bval '+' boolean_statement
| bval EQ boolean_statement
| bval NEQ boolean_statement
| NOT boolean_statement
| bval
;
bval:
BOOLEAN
| variable
;
integer_statement:
'(' integer_statement ')'
| value '+' integer_statement
| value '*' integer_statement
| value
;
value:
INTEGER
| variable
;
block:
statement
| '{' statements '}'
;
Here is the full code
%{
#include <cstdio>
#include <iostream>
using namespace std;
//stuff from flex that bison needs to know about:
extern "C" int yylex();
extern "C" int yyparse();
extern "C" FILE *yyin;
extern int line_num;
void yyerror(const char *s);
%}
//C union holding each of the types of tokens that Flex could return
%union {
int ival;
bool bval;
char const *sval;
}
//symbol defination
%token <sval> STRING;
%token <sval> NOT
%token CONSTANT_SECTION
%token BOOLEAN_SECTION
%token INTEGER_SECTION
%token LOGIC_SECTION
%token TIMER_SECTION
%token <sval> BOOLEAN
%token <ival> INTEGER
%token <ival> HEX
%token ENDL
%token BOOL
%token IF
%token ELSE
%token EQ NEQ
%token AND
%token OR
%token SUBROUTINE_END
%token SUBROUTINE_START
%token DELAY SECONDS HOURS MINUTES MSEC
%token GOTO
%token LABEL
%token CALL
//end of declaration section
%%
logic:
costants_declarations boolean_declarations integer_declarations timer_declarations logic_statements
| boolean_declarations integer_declarations timer_declarations logic_statements
| logic_statements
;
costants_declarations:
CONSTANT_SECTION constants
;
constants:
constants STRING '=' INTEGER { cout << "const int " << $2 << " = " << $4 << ";" << endl; }
| constants STRING '=' HEX { cout << "const int " << $2 << " = " << $4 << ";" << endl; }
| STRING '=' INTEGER { cout << "const int " << $1 << " = " << $3 << ";" << endl; }
| STRING '=' HEX { cout << "const int " << $1 << " = " << $3 << ";" << endl; }
;
boolean_declarations:
BOOLEAN_SECTION booleans
;
booleans:
booleans ',' boolean
| booleans boolean
| boolean
;
boolean:
STRING '[' INTEGER ']' { cout << "bool " << $1 << "[" << $3 << "]" << ";" << endl; }
| STRING '[' STRING ']' { cout << "bool " << $1 << "[" << $3 << "]" << ";" << endl; }
| STRING { cout << "bool " << $1 << " = true;" << endl; }
;
integer_declarations:
INTEGER_SECTION integers
;
integers:
integers ',' integer
| integers integer
| integer
;
integer:
STRING '[' INTEGER ']' { cout << "int " << $1 << "[" << $3 << "]" << ";" << endl; }
| STRING '[' STRING ']' { cout << "int " << $1 << "[" << $3 << "]" << ";" << endl; }
| STRING { cout << "int " << $1 << " = 0;" << endl; }
;
timer_declarations:
TIMER_SECTION timers
;
timers:
timers ',' timer
| timers timer
| timer
;
timer:
STRING { cout << "int " << $1 << ";" << endl; }
;
logic_statements:
LOGIC_SECTION subroutines statements
;
subroutines:
/* empty */
| SUBROUTINE_START STRING statements SUBROUTINE_END STRING
;
statements:
statements statement
| statement
;
statement:
if_statement
| delay_statement
| GOTO STRING
| LABEL
| CALL STRING
| BOOL variable_list '=' { cout << " = "; } boolean_statement { cout << ";\n"; }
| variable_list '=' { cout << " = "; } integer_statement { cout << ";\n"; }
;
if_statement:
IF '(' { cout << "if("; } boolean_statement ')' { cout << ")" << endl; } block
| IF '(' { cout << "if("; } boolean_statement ')' { cout << ")" << endl; } block ELSE block
;
delay_statement:
DELAY '=' INTEGER SECONDS statement
;
variable_list:
variable_list ',' { cout << " = "; } variable
| variable
;
variable:
STRING { cout << $1; }
| STRING '[' INTEGER ']' { cout << $1 << "[" << $3 << "]"; }
| STRING '[' STRING ']' { cout << $1 << "[" << $3 << "]"; }
;
boolean_statement:
'('{ cout << "("; } boolean_statement ')'{ cout << ")"; }
| bval '+' { cout << " || "; } boolean_statement
| bval OR { cout << " || "; } boolean_statement
| bval '*' { cout << " && "; } boolean_statement
| bval AND { cout << " && "; } boolean_statement
| bval EQ { cout << " == "; } boolean_statement
| bval NEQ { cout << " != "; } boolean_statement
| NOT { cout << $1; } boolean_statement
| bval
;
bval:
BOOLEAN { cout << $1; }
| variable
;
integer_statement:
'('{ cout << "("; } integer_statement ')'{ cout << ")"; }
| value '+'{ cout << " + "; } integer_statement
| value '*'{ cout << " * "; } integer_statement
| value
;
value:
INTEGER { cout << $1; }
| variable
;
block:
{ cout << "{" << endl; } statement { cout << "}" << endl; }
| '{' { cout << "{" << endl; } statements '}' { cout << "}" << endl; }
;
//end of grammer section
%%
int main(int argc, char *argv[]) {
// default input is stdin
// if file is given read from it
if(argc == 2)
{
// open a file handle to a particular file:
FILE *myfile = fopen(argv[1], "r");
// make sure it's valid:
if (!myfile) {
cout << "Can't open "<< argv[1] <<" file" << endl;
cout << "Usage: " << argv[0] << " <filename>\n";
return -1;
}
// set lex to read from it instead of defaulting to STDIN:
yyin = myfile;
}
else if(argc != 1)
{
cout << "Usage: " << argv[0] << " <filename>\n";
cout << "Usage: " << argv[0] << endl;
return -1;
}
// parse through the input until there is no more:
do
{
yyparse();
} while (!feof(yyin));
}
void yyerror(const char *s) {
cout << "Parse error on line " << line_num << "! Message: " << s << endl;
// might as well halt now:
exit(-1);
}
The problem is not with the IF
statement exactly. It's with the mid-rule actions (MRAs) in the two productions for if_statement
:
if_statement:
IF '(' { cout << "if("; } boolean_statement ')' { cout << ")" << endl; } block
| IF '(' { cout << "if("; } boolean_statement ')' { cout << ")" << endl; } block ELSE block
;
A mid-rule action, like { cout << "if("; }
, is translated to a uniquely-named empty non-terminal. In effect, the above productions are turned into the following:
if_statement:
IF '(' @3 boolean_statement ')' @4 block
| IF '(' @5 boolean_statement ')' @6 block ELSE block
;
@3: %empty { cout << "if("; } ;
@4: %empty { cout << ")" << endl; } ;
@5: %empty { cout << "if("; } ;
@6: %empty { cout << ")" << endl; } ;
In the above, @3
and @5
are identical (as are @4
and @6
), but bison doesn't check for that; every MRA is considered unique. And that leads to the reduce/reduce conflict, because once the parser has read if (, it will need to reduce one of @3
or @5
before it can shift the following token, regardless what that token might be, but the next token gives no clue as to whether or not an else will eventually appear. (Both productions continue with boolean_statement
, so the following token can be any token in FIRST(boolean_statement)
, in either case.)
The fact that the conflict is resolved in favour of @3
(the textually earlier non-terminal) means that @5
can never be reduced, and bison provides a warning about that. (At least, my version of bison did.)
This is a classic problem with MRAs, sufficiently common that it warrants a section in the bison manual.
In this case, you can fix the problem simply by left-factoring:
if_statement:
if_then
| if_then ELSE block
;
if_then:
IF '(' { cout << "if("; }
boolean_statement ')' { cout << ")" << endl; }
block
;