Search code examples
whitespacebisonflex-lexeryacclex

YACC+FLEX. ignore whitespace doesn't work well


My problem is with whitespace in the input line. I'm working with Lex and Yacc, and I've got the next files:

interpret.l

%option noyywrap
%{
#include "interpret.tab.h"
%}

%x string
%x substring
%%

[\t ]+      /* ignore whitespace */ ;
"+"         { return SUM; }
"-"         { return SUB; }
"*"         { return MUL; }
"/"         { return DIV; }

"=="        { return EQ; }
">"         { return GT; }
"<"         { return LT; }

":="        {return IS;}

"("         { return LPAR; }
")"         { return RPAR; }

"if"        { return IF; }
"else"      { return ELSE; }
"then"      { return THEN; }

"print"     { return PRINT; }

[a-z A-Z]+  { return ID; }
[0-9]+      { yylval.i = atoi( yytext ); return INT; }

[\n]        { return EOLN; }

.           { printf("Illegal character %c: ", *yytext); }

interpret.y

%error-verbose
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

char* int_to_string();
char* reformat(char* sub);
int yyerror();
int yylex();
%}

%union {
  int b;
  int i;
}

%type <i> expr INT inst
%type <b> expr_booleana

%token SUM SUB MUL DIV
%token EQ GT LT IS
%token IF ELSE THEN
%token ID
%token INT
%token LPAR RPAR
%token EOLN
%token PRINT

%%

expr_lst : 
    | expr_lst inst EOLN 
    ;


inst : IF expr_booleana THEN {if($2){printf("true\n");}}
     | PRINT expr   {printf("print expr %i\n", $2);}
     ;

expr_booleana: expr EQ expr { $$ = $1==$3; }
             | expr GT expr { $$ = $1>$3; }
             | expr LT expr { $$ = $1<$3;}
             | LPAR expr_booleana RPAR { $$ = $2; }
             ;

expr : INT { $$ = $1; }
      | expr SUM expr   { $$ = $1 + $3; }
      | expr SUB expr   { $$ = $1 - $3; }
      | expr MUL expr   { $$ = $1 * $3; }
      | expr DIV expr   { $$ = $1 / $3; }
      | LPAR expr RPAR  { $$ = $2; }
      ;



%%


int yyerror( char* m ) {
   fprintf( stderr, "%s\n", m );
}

int main() {
  return yyparse();
}

YACC program isn't finished at all When I compile, it doesn't show warnings.

My problem is with inputs like "if(4>2) then". Console shows the message "syntax error, unexpected ID, expecting THEN".

If I don't write the space all is OK. I don't understand it because the line "[\t ]+ /* ignore whitespace */ ;" in interpret.l is written with the purpose of ignore whitespaces...

Can you help me?

Thank you in advance.


Solution

  • [a-z A-Z]+  { return ID; }
    

    says that an ID can be lower case letters, spaces, or upper case letters. Thus " then" (with leading space) is an ID. (It takes precedence over your whitespace pattern because the match is longer.)

    Also think about whether digits shouldn't be ok, as in times2. A common id pattern is

    [[:alpha:]_][[:alnum:]_]*