Search code examples
ccompiler-constructionbisonyacclex

parsing compiler grammar & faulty error recovery


i am currently learning how to program in Lex & Bison . I ' ve written the following program but it seems that it has a big problem on its recursiveness and also on error recovery . It always stops after printing the first error . I 've read about yyerrok and yyclearin but i cant get it work. Any help or some links with some tutorials will be appreciated.

cmp.y:

%{
#include <stdio.h>     /* C declarations used in actions */
#include <stdlib.h>
#include <string.h>


char *variables[1000];
char *var_type[1000];

extern int yylineno;
extern FILE* yyin;
//penistern char* yytpenist;
void yyerror(char *s);
void symbols(char *string);
void get_type(char *string);
void check_type(char* string_1,char* string_2, char* opt);
void is_declared(char* string);

%}


%union {char* var; char* type;} 

%token <type> INT  DOUBLE BOOL CHAR
%token FOR WHILE VOID
%token IF ELSE PRINTF  CONTINUE BREAK RETURN
%token STRUCT BYREF
%token NUM 
%token INCLUDE
%token DELETE NEW TRUE FALSE NULLV
%token ADD_ASSIGN SUB_ASSIGN MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN
%token INC_OP DEC_OP AND_OP OR_OP LE_OP GE_OP EQ_OP NE_OP
%token DOT
%token <var> ID 
%right '='
%left AND OR
%left '<' '>' INC_OP DEC_OP AND_OP OR_OP LE_OP GE_OP EQ_OP NE_OP
%error-verbose
%debug



%%


program
     :  declaration 
     |  declaration  program
     ;

declaration
   : declaration_variable ';'
   | declaration_function ';'
   | function 
   | error '\n' {yyerrok;}

   ;



identf
   : ID '[' statheri_expr ']'
   | ID
   ;

declaration_function
   : type ID '(' parameter_list ')' ';'
   | type ID '(' ')' ';' 
   ;

/*tipos_apotelesmatos
   : type
   | VOID
   ;*/

parameter_list
   :  parameter_list ',' parameter
   |  parameter
   ;

parameter
   : BYREF type ID
   | type ID
   ;

function
   : type ID '(' parameter_list ')' '{' stmt '}'
   | type ID '(' parameter_list ')' '{'  '}'
   | type ID '(' ')' '{' '}' 
   | type ID '(' ')' '{' stmt '}'
   | type ID '(' ')' '{' declaration '}' 
   //|error { yyerrok; yyclearin;}

   ;


stmt 
   : FOR '(' expr ';' expr ';'  expr ')' stmt
   | FOR '(' expr ';' expr ';'  ')' stmt
   | FOR '(' expr ';' ')' stmt
   | IF '(' expr ')' 
   //| stmt '{' expr_list '}'
   | '{' expr_list '}'
   | '{' stmt '}'
   | '{' '}'
   | CONTINUE ID ';'
   | CONTINUE ';'
   | BREAK ID ';'
   | BREAK ';'
   | RETURN expr 
   | RETURN ';'
   | expr ';'
   |';'

   ;



expr
   : '(' type ')' expr
   | ID'?' expr ':' expr
   | ID diadikos_telestis_anathesis expr 
   | ID monadiaios_telestis expr
   | ID monadiaios_telestis_anathesis ';'
   | ID diadikos_telestis expr
   | ID '(' expr_list ')'
   | '(' expr ')'
   | '(' expr_list ')'
   | '[' expr ']'
   //| NEW type '[' expr ']' 
   | NEW type 
   | DELETE expr 
   | TRUE
   | FALSE
   | NULLV
   | NUM
   | ID 

   ;

expr_list
   : expr
   | expr_list ',' expr
   ;

statheri_expr
   : expr
   ;



declaration_variable
   :  type identf 
   | declaration_variable ',' identf  
   ;

type
   : basic_type '*'
   | basic_type 
   ;

basic_type
   : INT
   | CHAR 
   | BOOL
   | DOUBLE
   | VOID
   ;




monadiaios_telestis
  : '&'
  | '*'
  | '+'
  | '-'
  | '!'
  ;

monadiaios_telestis_anathesis
    : INC_OP //++
    | DEC_OP //--
    ;

diadikos_telestis_anathesis
    : '='
    | MUL_ASSIGN//'*='
    | DIV_ASSIGN//'/='
    | MOD_ASSIGN //%=
    | ADD_ASSIGN//'+='
    | SUB_ASSIGN//'-='
    ;

diadikos_telestis
    : '/'
    | '%'
    | '<'
    | '>'
    | LE_OP//'<='
    | GE_OP//'>='
    | EQ_OP//'=='
    | NE_OP//'!='
    | AND_OP//'&&'
    | OR_OP//'||'
    ;

%%

cmp.l:

alpha [a-zA-Z]
digit [0-9]




%{
#include "y.tab.h"
#include <stdio.h>

extern void yyerror(const char *);  /* prints grammar violation message */
static void comment(void);

%}

%option  nodefault yylineno




%%

"/*"                         { comment(); }
"//".*                       { /* consume //-comment */ }

[\t\n]+                      {;}
"int"                        {yylval.type = strdup(yytext); return INT;}
"char"                       {yylval.type = strdup(yytext); return CHAR;}
"bool"                       {yylval.type = strdup(yytext); return BOOL;}
"void"                       {yylval.type = strdup(yytext); return VOID;}
"double"                     {yylval.type = strdup(yytext); return DOUBLE;}
"new"                        {return NEW;}
"continue"                   {return CONTINUE;} 
"delete"                     {return DELETE;}
"true"                       {return TRUE;}
"false"                      {return FALSE;}
"null"                       {return NULLV;}
"return"                     {return RETURN;}
"for"                        {return FOR;}
"while"                      {return WHILE;}
"if"                         {return IF;}
"else"                       {return ELSE;}
"printf"                     {return PRINTF;}
"struct"                     {return STRUCT;}
"byref"                      {return BYREF;}
"+="                         {return(ADD_ASSIGN); }
"-="                         {return(SUB_ASSIGN); }
"*="                         {return(MUL_ASSIGN); }
"/="                         {return(DIV_ASSIGN); }
"%="                         {return(MOD_ASSIGN); }
"++"                         {return(INC_OP); }
"--"                         {return(DEC_OP); }
"&&"                         {return(AND_OP); }
"||"                         {return(OR_OP); }
"<="                         {return(LE_OP); }
">="                         {return(GE_OP); }
"=="                         {return(EQ_OP); }
"!="                         {return(NE_OP); }
"&"                          {return('&'); }
"!"                          {return('!'); }
"~"                          {return('~'); }
"-"                          {return('-'); }
"+"                          {return('+'); }
"*"                         {return('*'); }
"/"                         {return('/'); }
"%"                          {return('%'); }
"<"                          {return('<'); }
">"                          {return('>'); }
"^"                          {return('^'); }
"|"                          {return('|'); }
"?"                          {return('?'); }
^"#include ".+               {;}
{digit}+                     {return NUM;}
{alpha}({alpha}|{digit})*    {yylval.var = strdup(yytext);return ID;}
";"                          {return ';'; }
"."                          {return DOT;}
\/\/.*                       {;}
\/\*(.*\n)*.*\*\/            {;}
[ \t\r\n]+                   {;}
.                            {return *yytext;}


%%



int yywrap (void) {return 1;}

static void comment(void)
{
    int c;

    while ((c = input()) != 0)
        if (c == '*')
        {
            while ((c = input()) == '*')
                ;

            if (c == '/')
                return;

            if (c == 0)
                break;
        }
    yyerror("Unterminated comment");


}

Solution

  • Your flex file ignores newlines and your error production skips tokens until it receives a newline. Since the parser will never receive a newline, the error recovery mechanism will discard all the rest of the input.