Search code examples
javaparsingcalculatorrecursive-descent

Recursive descent parser from BNF grammar in java


I need to make a recursive descent parser that follows the grammar

<program> → <statements>
<statements>→ <statement> | <statement><semi_colon><statements>
<statement> → <ident><assignment_op><expression>
<expression> → <term><term_tail>
<term_tail> → <add_op><term><term_tail> | ε
<term> → <factor> <factor_tail>
<factor_tail> → <mult_op><factor><factor_tail> | ε
<factor> → <left_paren><expression><right_paren> | <ident> | <const>
<const> → any decimal numbers
<ident> → any names conforming to C identifier rules
<assignment_op> → :=
<semi_colon> → ;
<add_operator> → + | -
<mult_operator> → * | /
<left_paren> → (
<right_paren> →)

I have made a lexer that tokenizes input into token type, token string, and token value.

public Token(int type, String token_string, Object value) {
        this.type = type;
        this.token_string = token_string;
        this.value = value;
    }
public class TokenType {
    public static final int NUMBER=1;
    public static final int LEFT_PAR=2;
    public static final int RIGHT_PAR=3;
    public static final int PLUS=4;
    public static final int MINUS=5;
    public static final int STAR=6;
    public static final int SLASH=7;
    public static final int IDENT=8;
    public static final int ASSIGN=9;
    public static final int SEMI_COLON=10;
    
}

I am supposed to parse inputs like

operator1 := 200 + 100 *(100 - 200);

operator2 := operator1 + 300 and return the operator's values

However, I am not sure how I could make the parser.


Solution

  • You add END_OF_TOKEN to TokenType.

    public class TokenType {
        public static final int NUMBER = 1;
        .....
        public static final int END_OF_TOKEN = 11;
    }
    

    You define a Tokenizer class. It returns a Token object by calling get(). For simplicity here it returns the Token passed to the constructor.

    public class Tokenizer {
        Token[] tokens;
        int index = 0;
    
        Tokenizer(Token... tokens) {
            this.tokens = tokens;
        }
    
        public Token get() {
            if (index < tokens.length)
                return tokens[index++];
            else
                return new Token(TokenType.END_OF_TOKEN, "End of token", null);
        }
    }
    

    You define a Parser.

    public class Parser {
        Tokenizer tokenizer;
        Token token;
        Map<String, Double> variables = new HashMap<>();
    
        Parser(Tokenizer tokenizer) {
            this.tokenizer = tokenizer;
        }
    
        double factor() {
            if (token.type == TokenType.LEFT_PAR) {
                token = tokenizer.get();
                double e = expression();
                if (token.type != TokenType.RIGHT_PAR)
                    throw new RuntimeException("')' expected");
                token = tokenizer.get();
                return e;
            } else if (token.type == TokenType.IDENT) {
                String name = token.token_string;
                token = tokenizer.get();
                if (!variables.containsKey(name))
                    throw new RuntimeException("variable '" + name + "' undefined");
                return variables.get(name);
            } else if (token.type == TokenType.NUMBER) {
                double value = Double.parseDouble(token.token_string);
                token = tokenizer.get();
                return value;
            } else
                throw new RuntimeException("unknown token '" + token.token_string + "'");
        }
    
        double term() {
            double value = factor();
            while (true) {
                if (token.type == TokenType.STAR) {
                    token = tokenizer.get();
                    value *= factor();
                } else if (token.type == TokenType.SLASH) {
                    token = tokenizer.get();
                    value /= factor();
                } else
                    break;
            }
            return value;
        }
    
        double expression() {
            double value = term();
            while (true) {
                if (token.type == TokenType.PLUS) {
                    token = tokenizer.get();
                    value += term();
                } else if (token.type == TokenType.MINUS) {
                    token = tokenizer.get();
                    value -= term();
                } else
                    break;
            }
            return value;
        }
    
        void statement() {
            if (token.type != TokenType.IDENT)
                throw new RuntimeException("identifier expected");
            String name = token.token_string;
            token = tokenizer.get();
            if (token.type != TokenType.ASSIGN)
                throw new RuntimeException("':=' expected");
            token = tokenizer.get();
            double value = expression();
            variables.put(name, value);
        }
    
        void statements() {
            while (true) {
                statement();
                if (token.type != TokenType.SEMI_COLON)
                    break;
                token = tokenizer.get();
            }
        }
    
        public Map<String, Double> parse() {
            token = tokenizer.get();
            statements();
            if (token.type != TokenType.END_OF_TOKEN)
                throw new RuntimeException("extra token '" + token.token_string + "'");
            return variables;
        }
    }
    

    Test:

    @Test
    public void testParser() {
        Tokenizer tokenizer = new Tokenizer(
            new Token(TokenType.IDENT, "operator1", null),
            new Token(TokenType.ASSIGN, null, null),
            new Token(TokenType.NUMBER, "200", null),
            new Token(TokenType.PLUS, null, null),
            new Token(TokenType.NUMBER, "100", null),
            new Token(TokenType.STAR, null, null),
            new Token(TokenType.LEFT_PAR, null, null),
            new Token(TokenType.NUMBER, "100", null),
            new Token(TokenType.MINUS, null, null),
            new Token(TokenType.NUMBER, "200", null),
            new Token(TokenType.RIGHT_PAR, null, null),
            new Token(TokenType.SEMI_COLON, null, null),
            new Token(TokenType.IDENT, "operator2", null),
            new Token(TokenType.ASSIGN, null, null),
            new Token(TokenType.IDENT, "operator1", null),
            new Token(TokenType.PLUS, null, null),
            new Token(TokenType.NUMBER, "300", null));
        Parser parser = new Parser(tokenizer);
        Map<String, Double> variables = parser.parse();
        System.out.println(variables);
    }
    

    output:

    {operator2=-9500.0, operator1=-9800.0}