I'm making a Visual Basic parser with Flex and Bison for a uni assignment. Most of it seems to be working properly except for the parsing of multiline expressions. Here is an example of a bit of code that doesn't work:
A = A +
1
And what is interesting is that removing A =
makes it parse properly. Same applies if I replace =
with an operator with higher or equal precedence than that of +
.
The parser seems to prioritize single line expressions over multiline ones. I understand that this is a precedence issue, but I have no idea how to solve this.
Truncated Flex code:
%option nounistd
%option noyywrap
%option case-insensitive
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parcer-mini.tab.h"
#pragma warning(disable : 4996)
#define YY_DECL int yylex()
int result = 0;
%}
%x STRING_LITERAL
%%
%{
char buf[100000];
memset(buf, '\0', 100000);
%}
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"=" { return '='; }
"<" { return '<'; }
">" { return '>'; }
"^" { return '^'; }
([0-9]([0-9]+)?) {yylval.int_val = atoi(yytext); return INT_VALUE;}
([a-zA-Z_])([a-zA-Z_0-9])* {yylval.id_var_name = (char *) malloc(strlen(yytext)+1); strcpy(yylval.id_var_name, yytext); return IDENTIFIER;}
\n+ {return END_OF_LINE;}
<<EOF>> { static int once = 0; return once++ ? 0 : END_OF_LINE;}
%%
Truncated Bison code:
%define parse.error verbose
%{
#pragma warning(disable : 4996)
#include <stdio.h>
#include <stdlib.h>
extern int yylineno;
extern FILE* yyin;
extern int yyparse();
extern int yylex();
void yyerror(const char* s);
%}
%union {
int int_val;
char* id_var_name;
}
%type <expression> expr_singleline expr_multiline basic_literal_value;
%type <statement> stmt root;
%type stmt_ends;
%token<int_val> INT_VALUE
%token<id_var_name> IDENTIFIER
%token END_OF_LINE
%left '='
%left '>' '<'
%left '+' '-'
%left '*' '/'
%right UNARY_MINUS UNARY_PLUS
%left '^'
%precedence IDENTIFIER
%start root
%%
root: stmt {printf("root 1\n");}
;
stmt: expr_multiline stmt_ends {printf("stmt 1\n");}
| expr_singleline stmt_ends {printf("stmt 2\n");}
;
stmt_ends: END_OF_LINE {printf("stmt_ends 1\n");}
| stmt_ends END_OF_LINE {printf("stmt_ends 2\n");}
;
expr_singleline: basic_literal_value {printf("expr_single 0\n");}
| '-' expr_singleline %prec UNARY_MINUS {printf("expr_single 1\n");}
| '+' expr_singleline %prec UNARY_PLUS {printf("expr_single 2\n");}
| expr_singleline '+' expr_singleline {printf("expr_single 3\n");}
| expr_singleline '-' expr_singleline {printf("expr_single 4\n");}
| expr_singleline '*' expr_singleline {printf("expr_single 5\n");}
| expr_singleline '/' expr_singleline {printf("expr_single 6\n");}
| expr_singleline '=' expr_singleline {printf("expr_single 8\n");}
| expr_singleline '<' expr_singleline {printf("expr_single 9\n");}
| expr_singleline '>' expr_singleline {printf("expr_single 10\n");}
| expr_singleline '^' expr_singleline {printf("expr_single 11\n");}
| IDENTIFIER {printf("expr_single 17\n");}
;
expr_multiline: expr_singleline '+' END_OF_LINE expr_singleline {printf("expr_multi 1\n");}
| expr_singleline '-' END_OF_LINE expr_singleline {printf("expr_multi 2\n");}
| expr_singleline '*' END_OF_LINE expr_singleline {printf("expr_multi 3\n");}
| expr_singleline '/' END_OF_LINE expr_singleline {printf("expr_multi 4\n");}
| expr_singleline '=' END_OF_LINE expr_singleline {printf("expr_multi 6\n");}
| expr_singleline '<' END_OF_LINE expr_singleline {printf("expr_multi 7\n");}
| expr_singleline '>' END_OF_LINE expr_singleline {printf("expr_multi 8\n");}
| expr_singleline '^' END_OF_LINE expr_singleline {printf("expr_multi 9\n");}
;
basic_literal_value: INT_VALUE {printf("basic_literal_value int\n");}
;
%%
int main(int argc, char** argv) {
if (argc > 1) {
yyin = fopen(argv[1], "r");
yyparse();
}
else {
yyerror("not found file");
}
}
void yyerror(const char* s) {
fprintf(stderr, "Parse error: %s\n", s);
exit(1);
}
I couldn't figure out how to solve the aforementioned issue, but I found a workaround. By creating an optional End of Line token, I merged expr_singleline and expr_multiline. This method works perfectly.
Modified truncated Bison code:
%define parse.error verbose
%{
#pragma warning(disable : 4996)
#include <stdio.h>
#include <stdlib.h>
extern int yylineno;
extern FILE* yyin;
extern int yyparse();
extern int yylex();
void yyerror(const char* s);
%}
%union {
int int_val;
char* id_var_name;
}
%type <expression> expr basic_literal_value;
%type <statement> stmt root;
%type stmt_ends optEoL;
%token<int_val> INT_VALUE
%token<id_var_name> IDENTIFIER
%token END_OF_LINE
%left '='
%left '>' '<'
%left '+' '-'
%left '*' '/'
%right UNARY_MINUS UNARY_PLUS
%left '^'
%precedence IDENTIFIER
%start root
%%
root: stmt {printf("root 1\n");}
;
stmt: expr stmt_ends {printf("stmt 2\n");}
;
stmt_ends: END_OF_LINE {printf("stmt_ends 1\n");}
| stmt_ends END_OF_LINE {printf("stmt_ends 2\n");}
;
optEoL: /*empty*/
| END_OF_LINE {printf("optEoL 1\n");}
;
expr: basic_literal_value {printf("expr_single 0\n");}
| '-' expr %prec UNARY_MINUS {printf("expr_single 1\n");}
| '+' expr %prec UNARY_PLUS {printf("expr_single 2\n");}
| expr '+' optEoL expr_singleline {printf("expr_single 3\n");}
| expr '-' optEoL expr_singleline {printf("expr_single 4\n");}
| expr '*' optEoL expr_singleline {printf("expr_single 5\n");}
| expr '/' optEoL expr_singleline {printf("expr_single 6\n");}
| expr '=' optEoL expr_singleline {printf("expr_single 8\n");}
| expr '<' optEoL expr_singleline {printf("expr_single 9\n");}
| expr '>' optEoL expr_singleline {printf("expr_single 10\n");}
| expr '^' optEoL expr_singleline {printf("expr_single 11\n");}
| IDENTIFIER {printf("expr_single 17\n");}
;
basic_literal_value: INT_VALUE {printf("basic_literal_value int\n");}
;
%%
int main(int argc, char** argv) {
if (argc > 1) {
yyin = fopen(argv[1], "r");
yyparse();
}
else {
yyerror("not found file");
}
}
void yyerror(const char* s) {
fprintf(stderr, "Parse error: %s\n", s);
exit(1);
}