Search code examples
cyacclex

How to make a grammar with assignation (=) and equality (==) operands?


As an assignment of the Compiler Theory course I'm taking, I have to code a calculator in lex/yacc. Some of the requirements are:

  • The user must be able to assign numeric or boolean values to a variable
  • The calculator must be able to compare boolean statements

If I try to input this:

a=5;
a; // Output: 5
a==5; // Syntax error

However, it works if I input this:

5==a; // Output: 1

I think the problem is that yacc is reading a==5 as a new assignment of the form <a> <=> <=5> instead of <a> <==> <5>. Still, I have no idea how to solve the issue. I tried using the %prec annotation but that didn't solve the problem.

Here is the code: calc.l

%{
#include "y.tab.h"
void yyerror(char* s);
%}

%%
"exit"      { return exit_command; }
[0-9]+      { yylval.num = atoi(yytext); return number; }
[a-zA-Z]    { yylval.id = yytext[0]; return identifier; }
[ \t\n]     ;
[-+*/()^%;] { return yytext[0]; }
[<>=!&|]    { return yytext[0]; }
.       { ECHO; yyerror("Unexpected character"); }
%%

int yywrap(void) { return 1; }

calc.y

%{
void yyerror(char *s);
int yylex();

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>

int yydebug = 1;

void print(int num);
int symbols[52];
int symbolVal(char symbol);
void updateSymbolVal(char symbol, int val);
%}

%union { int num; char id; }
%start line
%token exit_command
%token <num> number
%token <id> identifier
%type <num> line bool_exp arit_exp term factor 
%type <id> assignment

%%

line:   line bool_exp ';'   { print($2); }
    | bool_exp ';'      { print($1); }
    | line assignment ';'   {;}
    | assignment ';'    {;}
    | line equality ';' {;}
    | equality ';'      {;}
    | line exit_command { printf("Goodbye\n"); exit(EXIT_SUCCESS); }
    | exit_command      { printf("Goodbye\n"); exit(EXIT_SUCCESS); }
    ;

bool_exp:   arit_exp            { $$ = $1; }
        | bool_exp '<' arit_exp     { $$ = $1 < $3; }
        | bool_exp '>' '=' arit_exp { $$ = $1 >= $4; }
        | bool_exp '>' arit_exp     { $$ = $1 > $3; }
        | bool_exp '<' '=' arit_exp { $$ = $1 <= $4; }
        | bool_exp '=' '=' arit_exp { $$ = $1 == $4; }
        | bool_exp '!' '=' arit_exp { $$ = $1 != $4; }
        | '!' bool_exp          { $$ = !$2; }
        | bool_exp '&' '&' arit_exp { $$ = $1 && $4; }
        | bool_exp '|' '|' arit_exp { $$ = $1 || $4; }
        | '+' '+' identifier        { updateSymbolVal($3, symbolVal($3) + 1); $$ = symbolVal($3); }
        | '-' '-' identifier        { updateSymbolVal($3, symbolVal($3) - 1); $$ = symbolVal($3); }
        ;

arit_exp:   term            { $$ = $1; }
        | arit_exp '+' term { $$ = $1 + $3; }
        | arit_exp '-' term { $$ = $1 - $3; }
        ;

term:   factor          { $$ = $1;  }
        | term '*' factor   { $$ = $1 * $3;  }
        | term '/' factor   { $$ = $1 / $3;  }
    | term '^' factor   { $$ = pow($1, $3); }
    | term '%' factor   { $$ = $1 % $3; }
    | '-' factor        { $$ = $2 * -1; }
    ;

factor: number          { $$ = $1; }
    | identifier        { $$ = symbolVal($1); }
        | '(' bool_exp ')'  { $$ = $2; }
    ;

assignment: identifier '=' bool_exp { updateSymbolVal($1, $3); printf("> %c <- %i\n> ", $1, $3); } ;

%%

void print(int num) {
    printf("> Result: %i\n< ", num);
}

int computeSymbolIndex(char token) {
    int idx = -1;
    if (islower(token)) {
        idx = token - 'a' + 26;
    }
    else if (isupper(token)) {
        idx = token - 'A';
    }
    return idx;
}

/* Returns the value of a given symbol */
int symbolVal(char symbol) {
    int bucket = computeSymbolIndex(symbol);
    return symbols[bucket];
}

void updateSymbolVal(char symbol, int val) {
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = val;
}

int main(void) {
    printf("Calculator made with lex and yacc\n");
    printf("Usage: <Arithmetic or boolean expression> <semicolon>\n");
    printf("e.g: 2+2;\n\n< ");
    return yyparse();
}

void yyerror(char *s) { fprintf(stderr, "Error: %s\n", s); }

Solution

  • The operators should be tokens. So you'd get a Lex scanner with:

    "=="   { return EQUAL; }
    

    and so on. It's wrong to form tokens in the parser like you do now with '=' '='.

    In the Yacc file you can define the associativity of the operators using %left TOKEN and %right TOKEN. The order in which you place them determines their precedence (see this post or a yacc manual).