Search code examples
pythonstringparsinglogic

Placing brackets in logical formula where possible in Python


How can I place brackets where possible in a first order logical formula in Python? This formula only exists out of logical not ¬, and ∧ or ∨, implies → and bi implies ↔. For example "¬(A ∧ B) ∨ C" yields ((¬(A ∧ B)) ∨ C). You can see my attempt below. I want a function that wraps each operator in brackets (see tests) so only one operator is present within the brackets. Does anyone know why my code fails or any solution to this problem?

import re

def add_brackets(formula):
    # Define operator priorities
    priority = {'¬': 4, '∧': 3, '∨': 2, '→': 1, '↔': 0}

    # Convert formula to list of tokens
    tokens = re.findall(r'\(|\)|¬|∧|∨|→|↔|[A-Z]', formula)

    # Initialize stack to hold operators
    stack = []

    # Initialize queue to hold output
    output = []

    # Loop through tokens
    for token in tokens:
        if token.isalpha():
            output.append(token)
        elif token == '¬':
            stack.append(token)
        elif token in '∧∨→↔':
            while stack and stack[-1] != '(' and priority[token] <= priority[stack[-1]]:
                output.append(stack.pop())
            stack.append(token)
        elif token == '(':
            stack.append(token)
        elif token == ')':
            while stack and stack[-1] != '(':
                output.append(stack.pop())
            stack.pop()
    
    # Empty out remaining operators on the stack
    while len(stack) != 0:
        if stack[-1] == '(':
            raise ValueError('Unmatched left parenthesis')
        output.append(stack.pop())

    # Loop through output
    for i, token in enumerate(output):
        if token in '∧∨→↔':
            if len(output) < i + 3:
                raise ValueError('Invalid formula')
            result = '({} {} {})'.format(output[i+1], token, output[i+2])
            output[i:i+3] = [result]
    
    # Return final result
    return output[0]


# Formula 1
formula1 = "A ∧ B ∨ C"
assert add_brackets(formula1) == "(A ∧ (B ∨ C))"

# Formula 2
formula2 = "¬(A ∧ B) ∨ C"
assert add_brackets(formula2) == "((¬(A ∧ B)) ∨ C)"

# Formula 3
formula3 = "A ∧ B ∧ C ∧ D ∧ E"
assert add_brackets(formula3) == "((((A ∧ B) ∧ C) ∧ D) ∧ E))"

# Formula 4
formula4 = "¬A ∧ ¬B ∧ ¬C"
assert add_brackets(formula4) == "(((¬A) ∧ (¬B)) ∧ (¬C))"

# Formula 5
formula5 = "A ∧ ¬(B ∨ C)"
assert add_brackets(formula5) == "(A ∧ (¬(B ∨ C)))"

# Formula 6
formula6 = "A ∨ B → C ∧ D"
assert add_brackets(formula6) == "((A ∨ B) → (C ∧ D))"

# Formula 7
formula7 = "A ∧ B → C ∨ D"
assert add_brackets(formula7) == "(((A ∧ B) → (C ∨ D)))"

# Formula 8
formula8 = "¬(A ∧ B) → C ∨ D"
assert add_brackets(formula8) == "((¬(A ∧ B)) → (C ∨ D))"

# Formula 9
formula9 = "(A → B) → (C → D)"
assert add_brackets(formula9) == "((A → B) → (C → D))"

# Formula 10
formula10 = "(A ∧ B) ∨ (C ∧ D) → E"
assert add_brackets(formula10) == "(((A ∧ B) ∨ (C ∧ D)) → E)"

Solution

  • Pyparsing is a Python parser combinator library for building parsers using its own embedded DSL syntax. Think of it as a verbose sort of regular expression, but with more capabilities for recursive grammar, named results, and parse-time callbacks.

    Pyparsing also includes helper or macro methods for common constructs, like delimited lists, quoted strings, and other patterns such as IP addresses, date/time timestamps, etc. One such method is one for defining infix notation, in which you provide an expression for the base operand (in your case, a word of one or more alpha letters) and a list of tuples that define the operations in precedence order. Each tuple contains the symbol, its number of operands, and right- or left-associativity.

    Here is the whole pyparsing parser to parse your grammar:

    import pyparsing as pp
    
    var = pp.Word(pp.alphas)
    expr = pp.infix_notation(
        var,
        [
            # '¬': 4, '∧': 3, '∨': 2, '→': 1, '↔': 0
            ('¬', 1, pp.opAssoc.RIGHT),
            ('∧', 2, pp.opAssoc.LEFT),
            ('∨', 2, pp.opAssoc.LEFT),
            ('→', 2, pp.opAssoc.LEFT),
            ('↔', 2, pp.opAssoc.LEFT),
        ]
    )
    

    Pyparsing expressions have a run_tests method to feed a list of test strings, and run_tests will parse each and list out the parsed results as a list.

    Here are your test strings:

    test_strings = (
        """
        # Formula 1
        A ∧ B ∨ C
    
        # Formula 2
        ¬(A ∧ B) ∨ C
    
        # Formula 3
        A ∧ B ∧ C ∧ D ∧ E
    
        # Formula 4
        ¬A ∧ ¬B ∧ ¬C
    
        # Formula 5
        A ∧ ¬(B ∨ C)
    
        # Formula 6
        A ∨ B → C ∧ D
    
        # Formula 7
        A ∧ B → C ∨ D
    
        # Formula 8
        ¬(A ∧ B) → C ∨ D
    
        # Formula 9
        (A → B) → (C → D)
    
        # Formula 10
        (A ∧ B) ∨ (C ∧ D) → E
        """
    )
    
    expr.run_tests(test_strings, full_dump=False)
    

    Calling run_tests gives these results:

    # Formula 1
    A ∧ B ∨ C
    [[['A', '∧', 'B'], '∨', 'C']]
    
    # Formula 2
    ¬(A ∧ B) ∨ C
    [[['¬', ['A', '∧', 'B']], '∨', 'C']]
    
    # Formula 3
    A ∧ B ∧ C ∧ D ∧ E
    [['A', '∧', 'B', '∧', 'C', '∧', 'D', '∧', 'E']]
    
    # Formula 4
    ¬A ∧ ¬B ∧ ¬C
    [[['¬', 'A'], '∧', ['¬', 'B'], '∧', ['¬', 'C']]]
    
    # Formula 5
    A ∧ ¬(B ∨ C)
    [['A', '∧', ['¬', ['B', '∨', 'C']]]]
    
    # Formula 6
    A ∨ B → C ∧ D
    [[['A', '∨', 'B'], '→', ['C', '∧', 'D']]]
    
    # Formula 7
    A ∧ B → C ∨ D
    [[['A', '∧', 'B'], '→', ['C', '∨', 'D']]]
    
    # Formula 8
    ¬(A ∧ B) → C ∨ D
    [[['¬', ['A', '∧', 'B']], '→', ['C', '∨', 'D']]]
    
    # Formula 9
    (A → B) → (C → D)
    [[['A', '→', 'B'], '→', ['C', '→', 'D']]]
    
    # Formula 10
    (A ∧ B) ∨ (C ∧ D) → E
    [[[['A', '∧', 'B'], '∨', ['C', '∧', 'D']], '→', 'E']]
    
    

    That's pretty close to your desired output, though these are parsed nested lists, and we really want your output strings with the parentheses. You'll also notice that pyparsing does not emit all binary output, but for repeated operands with the same operator, you get just one list (like this one for Formula 3 A ∧ B ∧ C ∧ D ∧ E -> [['A', '∧', 'B', '∧', 'C', '∧', 'D', '∧', 'E']]). We could write a recursive regrouper for these lists (which I encourage you to try writing for yourself), or we could regroup each parsed group at parse time using a callback which in pyparsing we call a parse action. The tuples for infix_notation can take a fourth element which used as a parse action. Here is the modified grammar, with parse actions for unary and binary operators:

    def unary_regroup(tokens):
        t = tokens[0]
        return [pp.ParseResults([t[0], [t[1:]]])]
    
    def binary_regroup(tokens):
        t = tokens[0]
        # get the operator for this expression
        op = t[1]
        # get all the operands - [::2] gets just the operands, not the 
        # intervening operators (which we already have)
        opers = t[::2]
    
        # create a group for the first two operands
        ret = pp.ParseResults([opers[0], op, opers[1]])
        for operand in opers[2:]:
            # for each subsequent operand, create a new group
            ret = pp.ParseResults([ret, op, operand])
    
        return [ret]
    
    var = pp.Word(pp.alphas)
    expr = pp.infix_notation(
        var,
        [
            # '¬': 4, '∧': 3, '∨': 2, '→': 1, '↔': 0
            ('¬', 1, pp.opAssoc.RIGHT, unary_regroup),
            ('∧', 2, pp.opAssoc.LEFT, binary_regroup),
            ('∨', 2, pp.opAssoc.LEFT, binary_regroup),
            ('→', 2, pp.opAssoc.LEFT, binary_regroup),
            ('↔', 2, pp.opAssoc.LEFT, binary_regroup),
        ]
    )
    

    (I apologize for the complexity of these two functions, pyparsing is somewhat persnickety when parse actions need to return token lists that are not simple structures.)

    If we call run_tests now for Formula 3, you can see that the previous all-in-one group has been broken down in to nested binary groups:

    # Formula 3
    A ∧ B ∧ C ∧ D ∧ E
    [[[[['A', '∧', 'B'], '∧', 'C'], '∧', 'D'], '∧', 'E']]
    

    Now we can write our recursive method to convert this into a string with nested parentheses:

    for test in test_strings.splitlines():
        test = test.strip()
        if not test or test.startswith("#"):
            print(test)
            continue
        parsed = expr.parse_string(test)
    
        def enclose_groups_in_parens(t):
            ret = []
            for ob in t:
                if isinstance(ob, (pp.ParseResults, list)):
                    ret.append(enclose_groups_in_parens(ob))
                else:
                    ret.append(ob)
            return f"({' '.join(ret)})" if len(ret) > 1 else ' '.join(ret)
    
        regrouped_string = enclose_groups_in_parens(parsed)
        print(test, '->', regrouped_string)
    

    Which prints:

    # Formula 1
    A ∧ B ∨ C -> ((A ∧ B) ∨ C)
    
    # Formula 2
    ¬(A ∧ B) ∨ C -> ((¬ (A ∧ B)) ∨ C)
    
    # Formula 3
    A ∧ B ∧ C ∧ D ∧ E -> ((((A ∧ B) ∧ C) ∧ D) ∧ E)
    
    # Formula 4
    ¬A ∧ ¬B ∧ ¬C -> (((¬ A) ∧ (¬ B)) ∧ (¬ C))
    
    # Formula 5
    A ∧ ¬(B ∨ C) -> (A ∧ (¬ (B ∨ C)))
    
    # Formula 6
    A ∨ B → C ∧ D -> ((A ∨ B) → (C ∧ D))
    
    # Formula 7
    A ∧ B → C ∨ D -> ((A ∧ B) → (C ∨ D))
    
    # Formula 8
    ¬(A ∧ B) → C ∨ D -> ((¬ (A ∧ B)) → (C ∨ D))
    
    # Formula 9
    (A → B) → (C → D) -> ((A → B) → (C → D))
    
    # Formula 10
    (A ∧ B) ∨ (C ∧ D) → E -> (((A ∧ B) ∨ (C ∧ D)) → E)