Search code examples
pythonparsingply

Parsing logical expression using PLY


I am trying to parse logical expression containing 'implies' 'and' 'or' and 'negation' with parentheses such as (Q(a)=>R(b)) using PLY where Q(a) and R(b) are predicates and always start with block letters and variables 'a' and 'b' will be single small letter characters.

Each operator and its operands will be surrounded by parenthesis. Some other examples of logical expression i am trying to parse are ((D(q,w) & E(r,s)) => F(t)), (((G(a)=>H(b))=>I(c))=>J(d)), (~(~(~P(a)))).

Below is the code i have written

import ply.lex as lex
import ply.yacc as yacc

tokens = [
    'LPAREN',
    'RPAREN',
    'PREDICATE',
    'AND',
    'OR',
    'IMPLIES',
    'NEGATION'
]

t_PREDICATE = r'[A-Z][a-z]*\(([A-Za-z,]+)\)'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_AND = r'\&'
t_OR = r'\|'
t_IMPLIES = r'\=>'
t_NEGATION = r'\~'

t_ignore = r' '

def t_error(t):
    print("Illegeal characters")
    t.lexer.skip(1)

lexer = lex.lex()


def p_expression_negation(p):
    '''
    expression : LPAREN NEGATION PREDICATE RPAREN

    '''
    p[0] = (p[2], p[3])
    print(p[0])

def p_expression_single(p):
    '''
    expression : PREDICATE
    '''
    p[0] = p[1]
    print(p[0])

def p_expression(p):
    '''
    expression : LPAREN term IMPLIES term RPAREN
               | LPAREN term AND term RPAREN
               | LPAREN term OR term RPAREN
    '''

    p[0] = (p[3], p[2], p[4])
    print(p[0])

def p_term_negation(p):
    '''
    term : LPAREN NEGATION PREDICATE RPAREN

    '''
    p[0] = (p[2], p[3])
    return p[0]


def p_term_single(p):
    '''
    term : PREDICATE
    '''
    p[0] = (p[1])
    return p[0]

def p_term_multiple(p):
    '''
    term : LPAREN PREDICATE IMPLIES PREDICATE RPAREN
         | LPAREN PREDICATE AND PREDICATE RPAREN
         | LPAREN PREDICATE OR PREDICATE RPAREN

    '''
    p[0] = (p[3], p[2], p[4])
    return p[0]


def p_error(p):
    print("Syntax error in input!")

parser = yacc.yacc()

while True:
    try:
        s = input('Enter the input')
    except EOFError:
        break
    parser.parse(s, lexer=lexer)

But this program fails for logic expression such as (((G(a)=>H(b))=>I(c))=>J(d)), (~(~(~P(a)))) since these expressions start with more than 2 '(', i am not able to modify my code to accommodate such cases where opening parentheses at beginning of expression can range from 1 to n.

I think i should use recursion but that as well is failing for me.

This is my first program with PLY so not able to write a proper Grammar rule for yacc, it would be of great help if someone could help me with this


Solution

  • I don't remember where to use term but this works for me.

    EDIT:

    Previous version wasn't ideal because Q(a)=>R(b) was treated as correct expression.

    Current version treats Q(a)=>R(b) as incorrect expression.

    import ply.lex as lex
    import ply.yacc as yacc
    
    tokens = [
        'LPAREN',
        'RPAREN',
        'PREDICATE',
        'AND',
        'OR',
        'IMPLIES',
        'NEGATION'
    ]
    
    t_PREDICATE = r'[A-Z][a-z]*\(([A-Za-z,]+)\)'
    t_LPAREN = r'\('
    t_RPAREN = r'\)'
    t_AND = r'\&'
    t_OR = r'\|'
    t_IMPLIES = r'\=>'
    t_NEGATION = r'\~'
    
    t_ignore = r' '
    
    def t_error(t):
        print("Illegeal characters")
        t.lexer.skip(1)
    
    lexer = lex.lex()
    
    
    def p_expression_normal(p):
        '''
        expression : LPAREN PREDICATE RPAREN
        '''
        p[0] = ('()', p[2])
        print(p[0])
    
    def p_expression_negation(p):
        '''
        expression : LPAREN NEGATION PREDICATE RPAREN
                   | LPAREN NEGATION expression RPAREN 
        '''
        p[0] = ('()', p[2], p[3])
        print(p[0])
    
    def p_expression_operation(p):
        '''
        expression : LPAREN expression IMPLIES expression RPAREN 
                   | LPAREN expression AND expression RPAREN 
                   | LPAREN expression OR expression RPAREN 
                   | LPAREN PREDICATE IMPLIES expression RPAREN 
                   | LPAREN PREDICATE AND expression RPAREN 
                   | LPAREN PREDICATE OR expression RPAREN 
                   | LPAREN expression IMPLIES PREDICATE RPAREN 
                   | LPAREN expression AND PREDICATE RPAREN 
                   | LPAREN expression OR PREDICATE RPAREN
                   | LPAREN PREDICATE IMPLIES PREDICATE RPAREN
                   | LPAREN PREDICATE AND PREDICATE RPAREN
                   | LPAREN PREDICATE OR PREDICATE RPAREN               
        '''
        p[0] = ('()', p[3], p[2], p[4])
        print(p[0])
    
    def p_error(p):
        print("Syntax error in input!")
    
    parser = yacc.yacc()
    
    
    #while True:
    #    try:
    #        s = input('Enter the input: ')
    #    except EOFError:
    #        break
    #    parser.parse(s, lexer=lexer)
    
    test = [
        '(Q(a))',  # OK
        'Q(a)',  # wrong
        '(Q(a)=>R(b))',  # OK
        'Q(a)=>R(b)',  # wrong
        '(((G(a)=>H(b))=>I(c))=>J(d))',  # OK
        '((G(a)=>H(b))=>I(c))=>J(d)',  # wrong
        '(~(~(~P(a))))',  # OK
        '~(~(~P(a)))',  # wrong
        '((D(q,w) & E(r,s)) => F(t))',  # OK
        '(D(q,w) & E(r,s)) => F(t)',  # wrong
    ]
    
    for s in test:
        print(s)
        print()
        parser.parse(s, lexer=lexer)
        print('\n------\n')