Search code examples
javaparsingantlrantlr4antlr3

Nested Boolean Expression Parser using ANTLR


I'm trying to parse a Nested Boolean Expression and get the individual conditions within the expression separately. For e.g., if the input string is:

(A = a OR B = b OR C = c AND ((D = d AND E = e) OR (F = f AND G = g)))

I would like to get the conditions with the correct order. i.e.,

D =d AND E = e OR F = f AND G = g AND A = a OR B = b OR C = c

I'm using ANTLR 4 to parse the input text and here's my grammar:

grammar SimpleBoolean;

rule_set : nestedCondition* EOF;

AND : 'AND' ;
OR  : 'OR' ;
NOT : 'NOT';

TRUE  : 'TRUE' ;
FALSE : 'FALSE' ;

GT : '>' ;
GE : '>=' ;
LT : '<' ;
LE : '<=' ;
EQ : '=' ;

LPAREN : '(' ;
RPAREN : ')' ;

DECIMAL : '-'?[0-9]+('.'[0-9]+)? ;

IDENTIFIER : [a-zA-Z_][a-zA-Z_0-9]* ;

WS : [ \r\t\u000C\n]+ -> skip;

nestedCondition : LPAREN condition+ RPAREN (binary nestedCondition)*;
condition: predicate (binary predicate)*
            | predicate (binary component)*;
component: predicate | multiAttrComp;
multiAttrComp : LPAREN predicate (and predicate)+ RPAREN;
predicate : IDENTIFIER comparator IDENTIFIER;
comparator : GT | GE | LT | LE | EQ ;
binary: AND | OR ;
unary: NOT;
and: AND;

And here's the Java Code that I'm using to parse it:

ANTLRInputStream inputStr = new ANTLRInputStream(input);
SimpleBooleanLexer lexer = new SimpleBooleanLexer(inputStr);
TokenStream tokens = new CommonTokenStream(lexer);
SimpleBooleanParser parser = new SimpleBooleanParser(tokens);
parser.getBuildParseTree();
ParseTree tree = parser.rule_set();
System.out.println(tree.toStringTree(parser));

The output is:

(rule_set (nestedCondition ( (condition (predicate A (comparator =) a) (binary OR) (component (predicate B (comparator =) b)) (binary OR) (component (predicate C (comparator =) c)) (binary AND) (component (multiAttrComp ( (predicate ( D (comparator =) d) (and AND) (predicate E (comparator =) e) ))) (binary OR) (component (multiAttrComp ( (predicate F (comparator =) f) (and AND) (predicate G (comparator =) g) )))) ) )) <EOF>)

I'm looking for help on how to parse this tree to get the conditions in the correct order? In ANTLR 3, we could specify ^ and ! to decide how the tree is built(refer this thread), but I learnt that this is not supported in ANTLR 4.

Can someone suggest how I can parse the String in the correct order in Java using the ParseTree created by ANTLR?


Solution

  • I'd just wrap all the expressions into a single expression rule. Be sure to define the comparator expressions alternative before your binary expression alternative to make sure comparator operators bind more tightly than OR and AND:

    grammar SimpleBoolean;
    
    parse
     : expression EOF
     ;
    
    expression
     : LPAREN expression RPAREN                       #parenExpression
     | NOT expression                                 #notExpression
     | left=expression op=comparator right=expression #comparatorExpression
     | left=expression op=binary right=expression     #binaryExpression
     | bool                                           #boolExpression
     | IDENTIFIER                                     #identifierExpression
     | DECIMAL                                        #decimalExpression
     ;
    
    comparator
     : GT | GE | LT | LE | EQ
     ;
    
    binary
     : AND | OR
     ;
    
    bool
     : TRUE | FALSE
     ;
    
    AND        : 'AND' ;
    OR         : 'OR' ;
    NOT        : 'NOT';
    TRUE       : 'TRUE' ;
    FALSE      : 'FALSE' ;
    GT         : '>' ;
    GE         : '>=' ;
    LT         : '<' ;
    LE         : '<=' ;
    EQ         : '=' ;
    LPAREN     : '(' ;
    RPAREN     : ')' ;
    DECIMAL    : '-'? [0-9]+ ( '.' [0-9]+ )? ;
    IDENTIFIER : [a-zA-Z_] [a-zA-Z_0-9]* ;
    WS         : [ \r\t\u000C\n]+ -> skip;
    

    To test the grammar above, use the following quick-and-dirty test classes:

    public class Main {
    
      public static void main(String[] args) throws Exception {
    
        Map<String, Object> variables = new HashMap<String, Object>() {{
          put("A", true);
          put("a", true);
          put("B", false);
          put("b", false);
          put("C", 42.0);
          put("c", 42.0);
          put("D", -999.0);
          put("d", -1999.0);
          put("E", 42.001);
          put("e", 142.001);
          put("F", 42.001);
          put("f", 42.001);
          put("G", -1.0);
          put("g", -1.0);
        }};
    
        String[] expressions = {
            "1 > 2",
            "1 >= 1.0",
            "TRUE = FALSE",
            "FALSE = FALSE",
            "A OR B",
            "B",
            "A = B",
            "c = C",
            "E > D",
            "B OR (c = B OR (A = A AND c = C AND E > D))",
            "(A = a OR B = b OR C = c AND ((D = d AND E = e) OR (F = f AND G = g)))"
        };
    
        for (String expression : expressions) {
          SimpleBooleanLexer lexer = new SimpleBooleanLexer(new ANTLRInputStream(expression));
          SimpleBooleanParser parser = new SimpleBooleanParser(new CommonTokenStream(lexer));
          Object result = new EvalVisitor(variables).visit(parser.parse());
          System.out.printf("%-70s -> %s\n", expression, result);
        }
      }
    }
    
    class EvalVisitor extends SimpleBooleanBaseVisitor<Object> {
    
      private final Map<String, Object> variables;
    
      public EvalVisitor(Map<String, Object> variables) {
        this.variables = variables;
      }
    
      @Override
      public Object visitParse(SimpleBooleanParser.ParseContext ctx) {
        return super.visit(ctx.expression());
      }
    
      @Override
      public Object visitDecimalExpression(SimpleBooleanParser.DecimalExpressionContext ctx) {
        return Double.valueOf(ctx.DECIMAL().getText());
      }
    
      @Override
      public Object visitIdentifierExpression(SimpleBooleanParser.IdentifierExpressionContext ctx) {
        return variables.get(ctx.IDENTIFIER().getText());
      }
    
      @Override
      public Object visitNotExpression(SimpleBooleanParser.NotExpressionContext ctx) {
        return !((Boolean)this.visit(ctx.expression()));
      }
    
      @Override
      public Object visitParenExpression(SimpleBooleanParser.ParenExpressionContext ctx) {
        return super.visit(ctx.expression());
      }
    
      @Override
      public Object visitComparatorExpression(SimpleBooleanParser.ComparatorExpressionContext ctx) {
        if (ctx.op.EQ() != null) {
          return this.visit(ctx.left).equals(this.visit(ctx.right));
        }
        else if (ctx.op.LE() != null) {
          return asDouble(ctx.left) <= asDouble(ctx.right);
        }
        else if (ctx.op.GE() != null) {
          return asDouble(ctx.left) >= asDouble(ctx.right);
        }
        else if (ctx.op.LT() != null) {
          return asDouble(ctx.left) < asDouble(ctx.right);
        }
        else if (ctx.op.GT() != null) {
          return asDouble(ctx.left) > asDouble(ctx.right);
        }
        throw new RuntimeException("not implemented: comparator operator " + ctx.op.getText());
      }
    
      @Override
      public Object visitBinaryExpression(SimpleBooleanParser.BinaryExpressionContext ctx) {
        if (ctx.op.AND() != null) {
          return asBoolean(ctx.left) && asBoolean(ctx.right);
        }
        else if (ctx.op.OR() != null) {
          return asBoolean(ctx.left) || asBoolean(ctx.right);
        }
        throw new RuntimeException("not implemented: binary operator " + ctx.op.getText());
      }
    
      @Override
      public Object visitBoolExpression(SimpleBooleanParser.BoolExpressionContext ctx) {
        return Boolean.valueOf(ctx.getText());
      }
    
      private boolean asBoolean(SimpleBooleanParser.ExpressionContext ctx) {
        return (boolean)visit(ctx);
      }
    
      private double asDouble(SimpleBooleanParser.ExpressionContext ctx) {
        return (double)visit(ctx);
      }
    }
    

    Running the Main class will result in the following output:

    1 > 2                                                                  -> false
    1 >= 1.0                                                               -> true
    TRUE = FALSE                                                           -> false
    FALSE = FALSE                                                          -> true
    A OR B                                                                 -> true
    B                                                                      -> false
    A = B                                                                  -> false
    c = C                                                                  -> true
    E > D                                                                  -> true
    B OR (c = B OR (A = A AND c = C AND E > D))                            -> true
    (A = a OR B = b OR C = c AND ((D = d AND E = e) OR (F = f AND G = g))) -> true