Search code examples
parsingpascalyacclexparser-generator

can't find a simple error when parsing with YACC


i'm trying to make a very simple YACC parser on Pascal language which just includes integer declarations, some basic expressions and if-else statements. however, i cant find the error for hours and i'm going to be crazy soon. terminal says Error at line:0 but it is impossible!. i use flex and byacc for parser.i will be very glad if you can help me. this is my lex file as you can see;

%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
extern int yylval;
int linenum=0;
%}

digit   [0-9]
letter  [A-Za-z]

%%
if              return IF;
then                return THEN;
else                return ELSE;
for             return FOR;
while               return WHILE;
PROGRAM             return PROGRAM_SYM;
BEGIN               return BEGIN_SYM;
VAR             return VAR_SYM;
END             return END_SYM;
INTEGER             return INTEGER_SYM;
{letter}({letter}|{digit})* return identifier;
[0-9]+              return NUMBER;
[\<][\=]            return CON_LE;
[\>][\=]            return CON_GE;
[\=]                return CON_EQ;          
[\:][\=]            return ASSIGNOP;
;               return semiColon;
,               return comma;
\n              {linenum++;}
.               return (int) yytext[0];
%%

and this is my Yacc file

%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
extern FILE *yyin;
extern int linenum;
%}

%token PROGRAM_SYM VAR_SYM BEGIN_SYM END_SYM INTEGER_SYM NUMBER
%token identifier INTEGER ASSIGNOP semiColon comma THEN
%token IF ELSE FOR WHILE
%token CON_EQ CON_LE CON_GE GE LE

%left '*' '/'
%left '+' '-'

%start program

%%

program: PROGRAM_SYM identifier semiColon VAR_SYM dec_block BEGIN_SYM statement_list END_SYM '.'
     ;

dec_block:
        dec_list semiColon;

dec_list:
        dec_list dec
        |
        dec
        ;

dec: 
        int_dec_list
        ;

int_dec_list:   
        int_dec_list int_dec ':' type
        |
        int_dec ':' type
        ;

int_dec:
        int_dec comma identifier
        |
        identifier
        ;

type:
    INTEGER_SYM
    ;

statement_list:
        statement_list statement
        |
        statement
        ;

statement:
        assignment_list
        |
        expression_list
        |
        selection_list
        ;

assignment_list:
        assignment_list assignment
        |
        assignment      
        ;

assignment:

        identifier ASSIGNOP expression_list
        ;

expression_list:
        expression_list expression semiColon
        |
        expression semiColon
        ;


expression:
        '(' expression ')'
        |
        expression '*' expression
        |
        expression '/' expression
        |
        expression '+' expression
        |
        expression '-' expression
        |
        factor
        ;

factor:     
        identifier
        |
        NUMBER
        ;


selection_list:
        selection_list selection
        |
        selection
        ;

selection:
        IF '(' logical_expression ')' THEN statement_list ELSE statement_list
        ;


logical_expression:
        logical_expression '=' expression
        |
        logical_expression '>' expression
        |
        logical_expression '<' expression
        ;


%%
void yyerror(char *s){
    fprintf(stderr,"Error at line: %d\n",linenum);
}
int yywrap(){
    return 1;
}
int main(int argc, char *argv[])
{
    /* Call the lexer, then quit. */
    yyin=fopen(argv[1],"r");
    yyparse();
    fclose(yyin);
    return 0;
}

and finally i take an error at the first line when i give the input;

PROGRAM myprogram;

VAR

i:INTEGER;

i3:INTEGER;

j:INTEGER;

BEGIN

i := 3;

j := 5;

i3 := i+j*2;

i := j*20;

if(i>j)

then i3 := i+50+(45*i+(40*j));

else i3 := i+50+(45*i+(40*j))+i+50+(45*i+(30*j));

END.

Solution

  • Your lexical analyzer returns blanks and tabs as tokens, but the grammar doesn't recognize them.

    Add a parser rule:

    [ \t\r]    { }
    

    This gets you to line 6 instead of line 0 before you run into an error. You get that error because you don't allow semicolons between declarations:

    dec_block:
            dec_list semiColon;
    
    dec_list:
            dec_list dec
            |
            dec
            ;
    
    dec:
            int_dec_list
            ;
    

    That should probably be:

    dec_block:
            dec_block dec
            |
            dec
            ;
    
    dec:
            int_dec_list semiColon
            ;
    

    Doing this gets you to line 14 in the input.

    Incidentally, one of the first things I did was to ensure that the lexical analyzer tells me what it is doing, by modifying the rules like this:

    if              { printf("IF\n"); return IF; }
    

    In long term code, I'd make that diagnostic output selectable at run-time.


    You have a general problem with where you expect semicolons. It is also not clear that you should allow an expression_list in the rule for statement (or, maybe, 'not yet' — that might be appropriate when you have function calls, but allowing 3 + 2 / 4 as a 'statement' is not very helpful).


    This grammar gets to the end of the input:

    %{
    #include <stdio.h>
    #include <string.h>
    #include "y.tab.h"
    extern FILE *yyin;
    extern int linenum;
    %}
    
    %token PROGRAM_SYM VAR_SYM BEGIN_SYM END_SYM INTEGER_SYM NUMBER
    %token identifier INTEGER ASSIGNOP semiColon comma THEN
    %token IF ELSE FOR WHILE
    %token CON_EQ CON_LE CON_GE GE LE
    
    %left '*' '/'
    %left '+' '-'
    
    %start program
    
    %%
    
    program: PROGRAM_SYM identifier semiColon VAR_SYM dec_block BEGIN_SYM statement_list END_SYM '.'
         ;
    
    dec_block:
            dec_block dec
            |
            dec
            ;
    
    dec:
            int_dec_list semiColon
            ;
    
    int_dec_list:
            int_dec_list int_dec ':' type
            |
            int_dec ':' type
            ;
    
    int_dec:
            int_dec comma identifier
            |
            identifier
            ;
    
    type:
        INTEGER_SYM
        ;
    
    statement_list:
            statement_list statement
            |
            statement
            ;
    
    statement:
            assignment
            |
            selection
            ;
    
    assignment:
            identifier ASSIGNOP expression semiColon
            ;
    
    expression:
            '(' expression ')'
            |
            expression '*' expression
            |
            expression '/' expression
            |
            expression '+' expression
            |
            expression '-' expression
            |
            factor
            ;
    
    factor:
            identifier
            |
            NUMBER
            ;
    
    selection:
            IF '(' logical_expression ')' THEN statement_list ELSE statement_list
            ;
    
    logical_expression:
            expression '=' expression
            |
            expression '>' expression
            |
            expression '<' expression
            ;
    
    %%
    void yyerror(char *s){
        fprintf(stderr,"Error at line: %d\n",linenum);
    }
    int yywrap(){
        return 1;
    }
    int main(int argc, char *argv[])
    {
        /* Call the lexer, then quit. */
        yyin=fopen(argv[1],"r");
        yyparse();
        fclose(yyin);
        return 0;
    }
    

    Key changes include removing assignment_list and expression_list, and modifying logical_expression so that the two sides of the expansion are expression, rather than the LHS being logical_expression (which then never had a primitive definition, leading to the problems with warnings).

    There are still issues to resolve; the expression_list in the selection should be more restrictive to accurately reflect the grammar of Pascal. (You need a block where that could be a single statement or a BEGIN, statement list, END.)