Search code examples
c++assemblycompiler-constructionyacclex

Simple lex/yacc converter from pseudo C to pseudo assemlby language


Had to repeat question to provide answer

I would like to make some kind of translator from mini C to NASM (assembly) using YACC/LEX (c++) Program should recive part of code and return file with that part in NASM assembly

mini C should support code like this one

a = v1;
b = v2;
while(condition){
    // Some expressions +
}
if(Condition){ //
// Some expressions + 
else{
// Some expressions +
}

It should recognize positive int values, only simple expressions + ,if/else and while loop with conditions {<,>,==} I know how to do lexical analysis part using flex but parser is troublesome for me especially parsing while loop and if/else.

Those if/else and while parts can have begin/end instead of {}

I guess grammar should be like

Program: Block '.' 
;

Block : BEGIN_TOKEN Lines END_TOKEN
;

Lines: Lines ';' Line
| Line
;

Line: INT_TOKEN ID_TOKEN '=' NUM_TOKEN
| IF_TOKEN '(' Condition ')' Block ELSE Block
| WHILE_TOKEN '(' Condition ')' Block
| E
;

E: E + E
| ID_TOKEN
| NUM_TOKEN
;

Conditon: E '<' E
| E '>' E
| E EQ_TOKEN E
;

Conditions are simple ones like a<5 , b>c , d==3... It doesn't need to have any kind of prints and scans Actions should be written in separate c++ file


Solution

  • Because my last question was closed because someone though that I am writing book, I would like to share my solution to this problem. This problem was my homework.

    lexer.l

    %option noyywrap
    %option nounput
    %option noinput
    
    %{
        
        #include <iostream>
        #include <cstdlib>
        
        using namespace std;
        
        #include "parser.tab.hpp"
        
    %}
    
    %%
    
    while           {return WHILE_TOKEN;}
    if              {return IF_TOKEN;}
    else            {return ELSE_TOKEN;}
    print           {return PRINT_TOKEN;}
    "=="            {return EQ_TOKEN;}
    "<="            {return LEQ_TOKEN;}
    ">="            {return GEQ_TOKEN;}
    [-+(){}<>=;]    {return *yytext;}
    [a-z]           {yylval.s = new string(yytext); return ID_TOKEN;}
    (0|[1-9][0-9]*) {yylval.s = new stirng(yytext); return NUM_TOKEN;}
    [ \t\n]         {}
    .               {cerr<<"Lex error:  "<< *yytext << endl; exit(EXIT_FAILURE);}
    %%
    

    parser.ypp

    %{
    
    #include <ostream>
    #include <fstream>
    #include <cstdlib>
    #include <string>
    
    
    using namespace std;
    
    void yyerror(string s){
        cerr << s << endl;
        exit(EXIT_FAILURE);
    }
    
    extern int yylex();
    
    ofstream out;
    
    int next_label = 0;
    
    %}
    
    %union{
        string *s;
        int v;
    }
    
    %nonassoc IF_THEN
    %nonassoc ELSE_TOKEN
    
    %right '='
    %left '<' '>' EQ_TOKEN LEQ_TOKEN GEQ_TOKEN
    %left '+' '-'
    
    %type<s> ID_TOKEN NUM_TOKEN
    %type<v> WHILE_TOKEN IF_TOKEN ELSE_TOKEN
    %token PRINT_TOKEN EQ_TOKEN LEQ_TOKEN GEQ_TOKEN WHILE_TOKEN ELSE_TOKEN IF_TOKEN ID_TOKEN NUM_TOKEN
    
    %%
    
    Program: Statements {out << "" << endl;}
    ;
    
    Statements: Statements Statement {}
    |
    ;
    
    Statement: ';'
    | E ';'
    | '{' Statements '}'
    | ID_TOKEN '=' E ';' {out << "assign_val" << endl;}
    | PRINT_TOKEN '(' E ')' {out << "print" << endl;}
    | WHILE_TOKEN {$1 = next_label; out << "L:" << $1 << ":" << endl; next_label+=2;} '(' E ')' {out << "L" << $1 +1 << ":" << endl;} Statement
    | IF_TOKEN '(' E ')' {$1 = next_label++; out << "L" << $1 << ":" << endl;} Statement Else
    ;
    
    Else: ELSE_TOKEN {$1 = next_label++; out << "jmp L" << $1 << ":" << endl;} Statement {out << "L" << $1 << ":" << endl;}
    | %prec IF_THEN {}
    ;
    
    E: E '+' E {out << "add" << endl;}
    | E '-' E {out << "sub" << endl;}
    | E '<' E {out << "compL" << endl;}
    | E '>' E {out << "compG" << endl;}
    | E EQ_TOKEN E {out << "compEQ" << endl;}
    | E LEQ_TOKEN E {out << "compLEQ" << endl;}
    | E GEQ_TOKEN E {out << "compGEQ" << endl;}
    | '(' E ')'
    | ID_TOKEN {out << *$1 << endl;}
    | NUM_TOKEN {out << *$1 << endl;}
    ;
    
    %%
    
    int main(){
        out.open("output.s", ios_base::app);
        yyparse();
        return 0;
    }
    

    makefile

    program: parser.tab.o lex.yy.o
        g++ -Wall -o $@ $^
    parser.tab.o: parser.tab.cpp parser.tab.hpp
        g++ -Wall -c -o $@ $<
    lext.yy.o: lex.yy.c
        g++ -Wall -c -o $@ $<
    lex.yy.c: lexer.l parser.tab.hpp
        flex $<
    parser.tab.cpp parser.tab.hpp: parser.ypp
        bison -d -v $<
    

    Locate directory with source files in terminal and just run make command