I have a list of Boolean expressions that represent physical objects that can be combined to represent larger objects. They look something like this: ((A and B) or C). This object can be represented by a combination of A and B or by C alone. I would like to generate a list of lists of strings that can be used to create the object. In this case I want [[A,B], [C]].
Pyparsing looks pretty intriguing so I've decided to give it a shot for this problem. After a few failed attempts I've settled on adapting the fourFn.py example from the website. This is what I have so far:
from pyparsing import Literal, CaselessLiteral, Word, Combine, \
Group, Optional, ZeroOrMore, Forward, alphanums
exprStack = []
def myAnd(op1, op2):
if isinstance(op1, str):
return([op1, op2])
else:
return op1.append(op2)
def myOr(op1, op2):
if isinstance(op1, str):
return([[op1], [op2]])
else:
return op1.append([op2])
def pushFirst(strg, loc, toks):
exprStack.append(toks[0])
bnf = None
def BNF():
"""
boolop :: 'and' | 'or'
gene :: alphanum
atom :: gene | '(' expr ')'
"""
global bnf
if not bnf:
element = Word(alphanums)
andop = Literal( "and" )
orop = Literal( "or" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
boolop = andop | orop
expr = Forward()
atom = ((element | lpar + expr + rpar).setParseAction(pushFirst) | (lpar + expr.suppress() + rpar))
expr << atom + ZeroOrMore((boolop + expr).setParseAction(pushFirst))
bnf = expr
return bnf
# map operator symbols to corresponding arithmetic operations
fn = {"or": myOr,
"and": myAnd}
def evaluateStack( s ):
op = s.pop()
if op in fn:
op2 = evaluateStack(s)
op1 = evaluateStack(s)
return fn[op](op1, op2)
else:
return op
if __name__ == "__main__":
def test(s, expVal):
global exprStack
exprStack = []
results = BNF().parseString(s)
val = evaluateStack(exprStack[:])
if val == expVal:
print s, "=", val, results, "=>", exprStack
else:
print "!!! "+s, val, "!=", expVal, results, "=>", exprStack
test("((A and B) or C)", [['A','B'], ['C']])
test("(A and B) or C", [['A','B'], ['C']])
test("(A or B) and C", [['A', 'C'], ['B', 'C']])
test("A and B", ['A', 'B'])
test("A or B", [['A'], ['B']])
The first three tests fail here and only return the first element of each expression in parentheses. A will get pushed to the stack multiple times. It seems that the way I modified fourFn.py has broken my script's ability to handle these groups. Is there a better way to approach this problem?
edit After a cup of coffee I realized the problems I was having were pretty easy to solve. My new and and or functions are as follows:
def myAnd(op1, op2):
if isinstance(op1, str) and isinstance(op2, str):
newlist = [op1, op2]
elif isinstance(op1, str):
newlist = [op1]
newlist.append(op2)
elif isinstance(op2, str):
newlist = op1
newlist.append(op2)
else:
newlist = [op1.append(item) for item in op2]
return newlist
def myOr(op1, op2):
if isinstance(op1, str) and isinstance(op2, str):
newlist = [[op1], [op2]]
r
elif isinstance(op1, str):
newlist = [op1]
newlist.append([op2])
elif isinstance(op2, str):
newlist = [op1]
newlist.append([op2])
else:
newlist = [op1, [op2]]
return newlist1
And the parser is constructed as follows:
expr = Forward()
atom = element.setParseAction(pushFirst) | (lpar + expr + rpar)
expr << atom + ZeroOrMore((boolop + expr).setParseAction(pushFirst))
A new and more interesting question involves how to deal with a case like this (A or B) and C. The result should be [[A, C], [B, C]]. Is there a typical pyparsing way of dealing with this issue?
For future reference, here's an approach that works for my test cases but that deviates from AST approach suggested above:
from pyparsing import Literal, Word, Optional, \
Group, ZeroOrMore, Forward, alphanums
import ffparser, sys
exprStack = []
def myAnd(op1, op2):
if isinstance(op1, str) and isinstance(op2, str):
newlist = [[op1, op2]]
elif isinstance(op1, str):
newlist = op2
[item.insert(0, op1) for item in newlist]
elif isinstance(op2, str):
newlist = op1
[item.append(op2) for item in op1]
else:
newlist = [op1.append(item) for item in op2]
return newlist
def myOr(op1, op2):
if isinstance(op1, str) and isinstance(op2, str):
newlist = [[op1], [op2]]
elif isinstance(op1, str):
newlist = op2
newlist.insert(0, [op1])
elif isinstance(op2, str):
newlist = op1
newlist.append([op2])
else:
newlist = []
[newlist.append(item) for item in op1]
[newlist.append(item) for item in op2]
return newlist
def pushFirst(strg, loc, toks):
exprStack.append(toks[0])
bnf = None
def BNF():
"""
boolop :: 'and' | 'or'
gene :: alphanum
atom :: gene | '(' expr ')'
"""
global bnf
if not bnf:
element = Word(alphanums)
andop = Literal( "and" )
orop = Literal( "or" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
boolop = andop | orop
expr = Forward()
atom = element.setParseAction(pushFirst) | (Optional(lpar) + expr + Optional(rpar))
expr << atom + ZeroOrMore((boolop + expr).setParseAction(pushFirst))