Search code examples
javaparsingtokenantlr4

Dynamic operator tokens in ANTLR4


I'm trying to make a calculator in ANTLR4 that can use almost every possible symbol as mathematical operator.
Concrete:
- The user defines operations consisting of an operator and a precedence. The operator can be any combination of symbols except for some system symbols (parentheses, commas, ...). Precedence is a positive integer number. Operations are stored in a java HashMap.
- There are three different kinds of operations: left side (unary minus, ...), right side (factorial, ...) and binary (addition, ...)
- The operations should be requested at runtime, so that operations can be (de)activated during the parse. If this is not possible, then the operators should be requested at parser creation.
- For the precedence: full dynamic precedence is preferable(at runtime the precedence of an encountered operation is requested), but if it isn't possible then there should be different precedence presets. (multiplication, addition, ...)

What I've got:
- Working code for operator recognition
- Precedence climbing code which produces a correct parse tree, but gives an error: rule expr failed predicate: (getPrecedence($op) >= $_p)?

UPDATE: fixed operator recognition code, and found code for the precedence climbing mechanism

tokens { PREOP, POSTOP, BINOP, ERROR }
@lexer::members {

    private static List<String> binaryOperators;
    private static List<String> prefixOperators;
    private static List<String> postfixOperators;
    {
        binaryOperators = new ArrayList<String>();
        binaryOperators.add("+");
        binaryOperators.add("*");
        binaryOperators.add("-");
        binaryOperators.add("/");

        prefixOperators = new ArrayList<String>();
        prefixOperators.add("-");

        postfixOperators = new ArrayList<String>();
        postfixOperators.add("!");
    }

    private Deque<Token> deque = new LinkedList<Token>();
    private Token previousToken;
    private Token nextToken;

    @Override
    public Token nextToken() {
        if (!deque.isEmpty()) {
            return previousToken = deque.pollFirst();
        }

        Token next = super.nextToken();
        if (next.getType() != SYMBOL) {
            return previousToken = next;
        }

        StringBuilder builder = new StringBuilder();
        while (next.getType() == SYMBOL) {
            builder.append(next.getText());
            next = super.nextToken();
        }
        deque.addLast(nextToken = next);

        List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
        for (int i = tokens.size() - 1; i >= 0; i--) {
            deque.addFirst(tokens.get(i));
        }
        return deque.pollFirst();       
    }


    private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
        switch (type) {
        case POSTFIX:
            return getPostfixCombination(sequence);
        case PREFIX:
            return getPrefixCombination(sequence);
        case BINARY:
            return getBinaryCombination(sequence);
        default:
            break;
        }
        return null;
    }

    private static List<Token> getPrefixCombination(String sequence) {
        if (isPrefixOperator(sequence)) {
            List<Token> seq = new ArrayList<Token>(1);
            seq.add(0, new CommonToken(MathParser.PREOP, sequence));
            return seq;
        }
        if (sequence.length() <= 1) {
            return null;
        }

        for (int i = 1; i < sequence.length(); i++) {
            List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
            List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
            if (seq1 != null & seq2 != null) {
                seq1.addAll(seq2);
                return seq1;
            }
        }
        return null;
    }

    private static List<Token> getPostfixCombination(String sequence) {
        if (isPostfixOperator(sequence)) {
            List<Token> seq = new ArrayList<Token>(1);
            seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
            return seq;
        }
        if (sequence.length() <= 1) {
            return null;
        }

        for (int i = 1; i < sequence.length(); i++) {
            List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
            List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
            if (seq1 != null && seq2 != null) {
                seq1.addAll(seq2);
                return seq1;
            }
        }
        return null;
    }


    private static List<Token> getBinaryCombination(String sequence) {
        for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
            for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
                String seqPost = sequence.substring(0, i);
                List<Token> post = getPostfixCombination(seqPost);

                String seqPre = sequence.substring(sequence.length()-j, sequence.length());
                List<Token> pre = getPrefixCombination(seqPre);

                String seqBin = sequence.substring(i, sequence.length()-j);

                if ((post != null || seqPost.isEmpty()) && 
                    (pre != null || seqPre.isEmpty()) && 
                    isBinaryOperator(seqBin)) {
                    List<Token> res = new ArrayList<Token>();
                    if (post != null)
                        res.addAll(post);
                    res.add(new CommonToken(MathParser.BINOP, seqBin));
                    if (pre != null)
                        res.addAll(pre);
                    return res;
                }
            }
        }
        return null;
    }


    /**
     * Returns the expected operator type based on the previous and next token
     */
    private OperatorType getOperatorType() {
        if (isValueEnd(previousToken.getType())) {
            if (isValueStart(nextToken.getType())) {
                return OperatorType.BINARY;
            }
            return OperatorType.POSTFIX;
        }
        return OperatorType.PREFIX;
    }
    private enum OperatorType { BINARY, PREFIX, POSTFIX };


    /**
     * Checks whether the given token is a token found at the start of value elements
     * @param tokenType
     * @return
     */
    private static boolean isValueStart(int tokenType) {
        return tokenType == MathParser.INT;

    }
    /**
     * Checks whether the given token is a token found at the end of value elements
     * @param tokenType
     * @return
     */
    private static boolean isValueEnd(int tokenType) {
        return tokenType == MathParser.INT;

    }

    private static boolean isBinaryOperator(String operator) {
        return binaryOperators.contains(operator);
    }
    private static boolean isPrefixOperator(String operator) {
        return prefixOperators.contains(operator);
    }
    private static boolean isPostfixOperator(String operator) {
        return postfixOperators.contains(operator);
    }
}

Precedence climbing code:

@parser::members {
    static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
    static {
        precedenceMap.put("*", 2);
        precedenceMap.put("+", 1);
        precedenceMap.put("^", 4);
        precedenceMap.put("-", 3);
        precedenceMap.put("!", 5);
    }
    public static Integer getPrecedence(Token op) {
        return precedenceMap.get(op.getText());
    }
    public static Integer getNextPrecedence(Token op) {
        Integer p = getPrecedence(op);
        if (op.getType() == PREOP)          return p;
        else if (op.getText().equals("^"))  return p;
        else if (op.getType() == BINOP)     return p+1;
        else if (op.getType() == POSTOP)    return p+1;
        throw new IllegalArgumentException(op.getText());
    }
}

prog
    :   expr[0]
    ;


expr [int _p]
    :   aexpr 
        (   {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)] 
        |   {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
        )* 
    ;

atom
    :   INT 
    |   '(' expr[0] ')'
    |   op=PREOP expr[getNextPrecedence($op)]
    ;

So now the question is what can do about this predicate failure error


Solution

  • Thanks to the other contributors I have found a complete (and actually reasonably clean) solution for my problem.

    Operator matching:
    By looking at the tokens before and after the encountered series of symbols, it is possible to detect the fixity of the operator. After that, apply an algorithm which detects a sequence of valid operators in the symbol series. Then inject those tokens in the token stream (in nextToken() ). Just make sure you define all hardcoded tokens before the SYMBOL definition.

    Precedence climbing:
    Actually this wasn't that hard, it is exactly the same as ANTLR4's internal strategy.

    grammar Math;
    
    
    tokens { PREOP, POSTOP, BINOP, ERROR }
    
    @header {
        import java.util.*;
    }
    
    @lexer::members {
    
        private static List<String> binaryOperators;
        private static List<String> prefixOperators;
        private static List<String> postfixOperators;
        {
            binaryOperators = new ArrayList<String>();
            binaryOperators.add("+");
            binaryOperators.add("*");
            binaryOperators.add("-");
            binaryOperators.add("/");
            System.out.println(binaryOperators);
    
            prefixOperators = new ArrayList<String>();
            prefixOperators.add("-");
            System.out.println(prefixOperators);
    
            postfixOperators = new ArrayList<String>();
            postfixOperators.add("!");
            System.out.println(postfixOperators);
        }
    
        private Deque<Token> deque = new LinkedList<Token>();
    
        private Token previousToken;
        private Token nextToken;
    
        @Override
        public Token nextToken() {
            if (!deque.isEmpty()) {
                return previousToken = deque.pollFirst();
            }
    
            Token next = super.nextToken();
            if (next.getType() != SYMBOL) {
                return previousToken = next;
            }
    
            StringBuilder builder = new StringBuilder();
            while (next.getType() == SYMBOL) {
                builder.append(next.getText());
                next = super.nextToken();
            }
            deque.addLast(nextToken = next);
    
            List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
            for (int i = tokens.size() - 1; i >= 0; i--) {
                deque.addFirst(tokens.get(i));
            }
            return deque.pollFirst();       
        }
    
    
        private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
            switch (type) {
            case POSTFIX:
                return getPostfixCombination(sequence);
            case PREFIX:
                return getPrefixCombination(sequence);
            case BINARY:
                return getBinaryCombination(sequence);
            default:
                break;
            }
            return null;
        }
    
        private static List<Token> getPrefixCombination(String sequence) {
            if (isPrefixOperator(sequence)) {
                List<Token> seq = new ArrayList<Token>(1);
                seq.add(0, new CommonToken(MathParser.PREOP, sequence));
                return seq;
            }
            if (sequence.length() <= 1) {
                return null;
            }
    
            for (int i = 1; i < sequence.length(); i++) {
                List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
                List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
                if (seq1 != null & seq2 != null) {
                    seq1.addAll(seq2);
                    return seq1;
                }
            }
            return null;
        }
    
        private static List<Token> getPostfixCombination(String sequence) {
            if (isPostfixOperator(sequence)) {
                List<Token> seq = new ArrayList<Token>(1);
                seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
                return seq;
            }
            if (sequence.length() <= 1) {
                return null;
            }
    
            for (int i = 1; i < sequence.length(); i++) {
                List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
                List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
                if (seq1 != null && seq2 != null) {
                    seq1.addAll(seq2);
                    return seq1;
                }
            }
            return null;
        }
    
    
        private static List<Token> getBinaryCombination(String sequence) {
            for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
                for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
                    String seqPost = sequence.substring(0, i);
                    List<Token> post = getPostfixCombination(seqPost);
    
                    String seqPre = sequence.substring(sequence.length()-j, sequence.length());
                    List<Token> pre = getPrefixCombination(seqPre);
    
                    String seqBin = sequence.substring(i, sequence.length()-j);
    
                    if ((post != null || seqPost.isEmpty()) && 
                        (pre != null || seqPre.isEmpty()) && 
                        isBinaryOperator(seqBin)) {
                        List<Token> res = new ArrayList<Token>();
                        if (post != null)
                            res.addAll(post);
                        res.add(new CommonToken(MathParser.BINOP, seqBin));
                        if (pre != null)
                            res.addAll(pre);
                        return res;
                    }
                }
            }
            return null;
        }
    
    
        /**
         * Returns the expected operator type based on the previous and next token
         */
        private OperatorType getOperatorType() {
            if (isAfterAtom()) {
                if (isBeforeAtom()) {
                    return OperatorType.BINARY;
                }
                return OperatorType.POSTFIX;
            }
            return OperatorType.PREFIX;
        }
        private enum OperatorType { BINARY, PREFIX, POSTFIX };
    
    
        /**
         * Checks whether the current token is a token found at the start of atom elements
         * @return
         */
        private boolean isBeforeAtom() {
            int tokenType = nextToken.getType();
            return tokenType == MathParser.INT || 
                    tokenType == MathParser.PLEFT;
    
        }
        /**
         * Checks whether the current token is a token found at the end of atom elements
         * @return
         */
        private boolean isAfterAtom() {
            int tokenType = previousToken.getType();
            return tokenType == MathParser.INT ||
                    tokenType == MathParser.PRIGHT;
    
        }
    
        private static boolean isBinaryOperator(String operator) {
            return binaryOperators.contains(operator);
        }
        private static boolean isPrefixOperator(String operator) {
            return prefixOperators.contains(operator);
        }
        private static boolean isPostfixOperator(String operator) {
            return postfixOperators.contains(operator);
        }
    
    }
    
    @parser::members {
        static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
        static {
            precedenceMap.put("*", 2);
            precedenceMap.put("+", 1);
            precedenceMap.put("^", 4);
            precedenceMap.put("-", 3);
            precedenceMap.put("!", 5);
        }
        public static Integer getPrecedence(Token op) {
            return precedenceMap.get(op.getText());
        }
        public static Integer getNextPrecedence(Token op) {
            Integer p = getPrecedence(op);
            if (op.getType() == PREOP)          return p;
            else if (op.getText().equals("^"))  return p;
            else if (op.getType() == BINOP)     return p+1;
            throw new IllegalArgumentException(op.getText());
        }
    }
    
    prog
        :   expr[0]
        ;
    
    
    expr [int _p]
        :   atom
            (   {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)] 
            |   {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
            )* 
        ;
    
    atom
        :   INT 
        |   PLEFT expr[0] PRIGHT
        |   op=PREOP expr[getNextPrecedence($op)]
        ;
    
    INT
        :   ( '0'..'9' )+
        ;
    
    PLEFT   :   '(' ;
    PRIGHT  :   ')' ;
    
    WS
        : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
    
    SYMBOL
        :   .
        ;
    

    Note: code is meant as an example, not as my real code (operators and precedence will be requested externally)