I’ve got a pyparsing issue that I have spent days trying to fix, with no luck.
Here’s the relevant pseudocode:
class Parser(object):
def __init__(self):
self.multilineCommands = []
self.grammar = <pyparsing grammar> # depends on self.multilineCommands
So, I’m trying to get a specific set of doctests to pass. But the tests in question update self.multilineCommands
after instantiation. Although there are no issues setting the attribute correctly, self.grammar
seems blind to the change, and fails the tests.
However, if I set self.multilineCommands
inside __init__()
, then the tests all pass.
How can I get self.grammar
to stay up-to-date with self.multilineCommands
?
So, part of the issue here is that I’m refactoring code I didn’t write. My experience with pyparsing is also exclusively limited to my work on this project.
Pyparsing author Paul McGuire posted a helpful response, but I couldn’t get it to work. It could be an error on my part, but more likely the bigger issue is that I over-simplified the pseudo-code written above.
So, I’m going to post the actual code.
What you are about to see is uncensored. The sight of it might make you cringe…or maybe even cry. In the original module, this code was just a single piece of a total “god class”. Splitting out what is below into the Parser
class is just step 1 (and apparently, step 1 was enough to break the tests).
class Parser(object):
'''Container object pyparsing-related parsing.
'''
def __init__(self, *args, **kwargs):
r'''
>>> c = Cmd()
>>> c.multilineCommands = ['multiline']
>>> c.multilineCommands
['multiline']
>>> c.parser.multilineCommands
['multiline']
>>> c.case_insensitive = True
>>> c.case_insensitive
True
>>> c.parser.case_insensitive
True
>>> print (c.parser('').dump())
[]
>>> print (c.parser('/* empty command */').dump())
[]
>>> print (c.parser('plainword').dump())
['plainword', '']
- command: plainword
- statement: ['plainword', '']
- command: plainword
>>> print (c.parser('termbare;').dump())
['termbare', '', ';', '']
- command: termbare
- statement: ['termbare', '', ';']
- command: termbare
- terminator: ;
- terminator: ;
>>> print (c.parser('termbare; suffx').dump())
['termbare', '', ';', 'suffx']
- command: termbare
- statement: ['termbare', '', ';']
- command: termbare
- terminator: ;
- suffix: suffx
- terminator: ;
>>> print (c.parser('barecommand').dump())
['barecommand', '']
- command: barecommand
- statement: ['barecommand', '']
- command: barecommand
>>> print (c.parser('COMmand with args').dump())
['command', 'with args']
- args: with args
- command: command
- statement: ['command', 'with args']
- args: with args
- command: command
>>> print (c.parser('command with args and terminator; and suffix').dump())
['command', 'with args and terminator', ';', 'and suffix']
- args: with args and terminator
- command: command
- statement: ['command', 'with args and terminator', ';']
- args: with args and terminator
- command: command
- terminator: ;
- suffix: and suffix
- terminator: ;
>>> print (c.parser('simple | piped').dump())
['simple', '', '|', ' piped']
- command: simple
- pipeTo: piped
- statement: ['simple', '']
- command: simple
>>> print (c.parser('double-pipe || is not a pipe').dump())
['double', '-pipe || is not a pipe']
- args: -pipe || is not a pipe
- command: double
- statement: ['double', '-pipe || is not a pipe']
- args: -pipe || is not a pipe
- command: double
>>> print (c.parser('command with args, terminator;sufx | piped').dump())
['command', 'with args, terminator', ';', 'sufx', '|', ' piped']
- args: with args, terminator
- command: command
- pipeTo: piped
- statement: ['command', 'with args, terminator', ';']
- args: with args, terminator
- command: command
- terminator: ;
- suffix: sufx
- terminator: ;
>>> print (c.parser('output into > afile.txt').dump())
['output', 'into', '>', 'afile.txt']
- args: into
- command: output
- output: >
- outputTo: afile.txt
- statement: ['output', 'into']
- args: into
- command: output
>>> print (c.parser('output into;sufx | pipethrume plz > afile.txt').dump())
['output', 'into', ';', 'sufx', '|', ' pipethrume plz', '>', 'afile.txt']
- args: into
- command: output
- output: >
- outputTo: afile.txt
- pipeTo: pipethrume plz
- statement: ['output', 'into', ';']
- args: into
- command: output
- terminator: ;
- suffix: sufx
- terminator: ;
>>> print (c.parser('output to paste buffer >> ').dump())
['output', 'to paste buffer', '>>', '']
- args: to paste buffer
- command: output
- output: >>
- statement: ['output', 'to paste buffer']
- args: to paste buffer
- command: output
>>> print (c.parser('ignore the /* commented | > */ stuff;').dump())
['ignore', 'the /* commented | > */ stuff', ';', '']
- args: the /* commented | > */ stuff
- command: ignore
- statement: ['ignore', 'the /* commented | > */ stuff', ';']
- args: the /* commented | > */ stuff
- command: ignore
- terminator: ;
- terminator: ;
>>> print (c.parser('has > inside;').dump())
['has', '> inside', ';', '']
- args: > inside
- command: has
- statement: ['has', '> inside', ';']
- args: > inside
- command: has
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline has > inside an unfinished command').dump())
['multiline', ' has > inside an unfinished command']
- multilineCommand: multiline
>>> print (c.parser('multiline has > inside;').dump())
['multiline', 'has > inside', ';', '']
- args: has > inside
- multilineCommand: multiline
- statement: ['multiline', 'has > inside', ';']
- args: has > inside
- multilineCommand: multiline
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline command /* with comment in progress;').dump())
['multiline', ' command /* with comment in progress;']
- multilineCommand: multiline
>>> print (c.parser('multiline command /* with comment complete */ is done;').dump())
['multiline', 'command /* with comment complete */ is done', ';', '']
- args: command /* with comment complete */ is done
- multilineCommand: multiline
- statement: ['multiline', 'command /* with comment complete */ is done', ';']
- args: command /* with comment complete */ is done
- multilineCommand: multiline
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline command ends\n\n').dump())
['multiline', 'command ends', '\n', '\n']
- args: command ends
- multilineCommand: multiline
- statement: ['multiline', 'command ends', '\n', '\n']
- args: command ends
- multilineCommand: multiline
- terminator: ['\n', '\n']
- terminator: ['\n', '\n']
>>> print (c.parser('multiline command "with term; ends" now\n\n').dump())
['multiline', 'command "with term; ends" now', '\n', '\n']
- args: command "with term; ends" now
- multilineCommand: multiline
- statement: ['multiline', 'command "with term; ends" now', '\n', '\n']
- args: command "with term; ends" now
- multilineCommand: multiline
- terminator: ['\n', '\n']
- terminator: ['\n', '\n']
>>> print (c.parser('what if "quoted strings /* seem to " start comments?').dump())
['what', 'if "quoted strings /* seem to " start comments?']
- args: if "quoted strings /* seem to " start comments?
- command: what
- statement: ['what', 'if "quoted strings /* seem to " start comments?']
- args: if "quoted strings /* seem to " start comments?
- command: what
'''
# SETTINGS
self._init_settings()
# GRAMMAR
self._init_grammars()
# PARSERS
# For easy reference to all contained parsers.
# Hacky, I know. But I'm trying to fix code
# elsewhere at the moment... :P)
self._parsers = set()
self._init_prefixParser()
self._init_terminatorParser()
self._init_saveParser()
self._init_inputParser()
self._init_outputParser()
# intermission! :D
# (update grammar(s) containing parsers)
self.afterElements = \
pyparsing.Optional(self.pipe + pyparsing.SkipTo(self.outputParser ^ self.stringEnd, ignore=self.doNotParse)('pipeTo')) + \
pyparsing.Optional(self.outputParser('output') + pyparsing.SkipTo(self.stringEnd, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('outputTo'))
self._grammars.add('afterElements')
# end intermission
self._init_blankLineTerminationParser()
self._init_multilineParser()
self._init_singleLineParser()
self._init_optionParser()
# Put it all together:
self.mainParser = \
( self.prefixParser +
( self.stringEnd |
self.multilineParser |
self.singleLineParser |
self.blankLineTerminationParser |
self.multilineCommand + pyparsing.SkipTo(
self.stringEnd,
ignore=self.doNotParse)
)
)
self.mainParser.ignore(self.commentGrammars)
#self.mainParser.setDebug(True)
# And we've got mainParser.
#
# SPECIAL METHODS
#
def __call__(self, *args, **kwargs):
'''Call an instance for convenient parsing. Example:
p = Parser()
result = p('some stuff for p to parse')
This just calls `self.parseString()`, so it's safe to
override should you choose.
'''
return self.parseString(*args, **kwargs)
def __getattr__(self, attr):
# REMEMBER: This is only called when normal attribute lookup fails
raise AttributeError('Could not find {0!r} in class Parser'.format(attr))
@property
def multilineCommands(self):
return self._multilineCommands
@multilineCommands.setter
def multilineCommands(self, value):
value = list(value) if not isinstance(value, list) else value
self._multilineCommands = value
@multilineCommands.deleter
def multilineCommands(self):
del self._multilineCommands
self._multilineCommands = []
#
# PSEUDO_PRIVATE METHODS
#
def _init_settings(self, *args, **kwargs):
self._multilineCommands = []
self.abbrev = True # recognize abbreviated commands
self.blankLinesAllowed = False
self.case_insensitive = True
self.identchars = cmd.IDENTCHARS
self.legalChars = u'!#$%.:?@_' + pyparsing.alphanums + pyparsing.alphas8bit
self.noSpecialParse = {'ed','edit','exit','set'}
self.redirector = '>' # for sending output to file
self.reserved_words = []
self.shortcuts = {'?' : 'help' ,
'!' : 'shell',
'@' : 'load' ,
'@@': '_relative_load'}
self.terminators = [';']
self.keywords = [] + self.reserved_words
def _init_grammars(self, *args, **kwargs):
# Basic grammars
self.commentGrammars = (pyparsing.pythonStyleComment|pyparsing.cStyleComment).ignore(pyparsing.quotedString).suppress()
self.commentInProgress = '/*' + pyparsing.SkipTo( pyparsing.stringEnd ^ '*/' )
self.doNotParse = self.commentGrammars | self.commentInProgress | pyparsing.quotedString
self.fileName = pyparsing.Word(self.legalChars + '/\\')
self.inputFrom = self.fileName('inputFrom')
self.inputMark = pyparsing.Literal('<')
self.pipe = pyparsing.Keyword('|', identChars='|')
self.stringEnd = pyparsing.stringEnd ^ '\nEOF'
# Complex grammars
self.multilineCommand = pyparsing.Or([pyparsing.Keyword(c, caseless=self.case_insensitive) for c in self.multilineCommands ])('multilineCommand')
self.multilineCommand.setName('multilineCommand')
self.oneLineCommand = ( ~self.multilineCommand + pyparsing.Word(self.legalChars))('command')
# Hack-y convenience access to grammars
self._grammars = {
# Basic grammars
'commentGrammars',
'commentInProgress',
'doNotParse',
'fileName',
'inputFrom',
'inputMark',
'noSpecialParse',
'pipe',
'reserved_words',
'stringEnd',
# Complex grammars
'multilineCommand',
'oneLineCommand'
}
self.inputFrom.setParseAction(replace_with_file_contents)
self.inputMark.setParseAction(lambda x: '')
self.commentGrammars.addParseAction(lambda x: '')
if not self.blankLinesAllowed:
self.blankLineTerminator = (pyparsing.lineEnd * 2)('terminator')
if self.case_insensitive:
self.multilineCommand.setParseAction(lambda x: x[0].lower())
self.oneLineCommand.setParseAction(lambda x: x[0].lower())
def _init_all_parsers(self):
self._init_prefixParser()
self._init_terminatorParser()
self._init_saveParser()
self._init_inputParser()
self._init_outputParser()
# intermission! :D
# (update grammar(s) containing parsers)
self.afterElements = \
pyparsing.Optional(self.pipe + pyparsing.SkipTo(self.outputParser ^ self.stringEnd, ignore=self.doNotParse)('pipeTo')) + \
pyparsing.Optional(self.outputParser('output') + pyparsing.SkipTo(self.stringEnd, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('outputTo'))
self._grammars.setName('afterElements')
self._grammars.add('afterElements')
# end intermission
# FIXME:
# For some reason it's necessary to set this again.
# (Otherwise pyparsing results include `outputTo`, but not `output`.)
self.outputParser('output')
self._init_blankLineTerminationParser()
self._init_multilineParser()
self._init_singleLineParser()
self._init_optionParser()
def _init_prefixParser(self):
self.prefixParser = pyparsing.Empty()
self.prefixParser.setName('prefixParser')
self._parsers.add('prefixParser')
def _init_terminatorParser(self):
self.terminatorParser = pyparsing.Or([ (hasattr(t, 'parseString') and t) or pyparsing.Literal(t) for t in self.terminators])('terminator')
self.terminatorParser.setName('terminatorParser')
self._parsers.add('terminatorParser')
def _init_saveParser(self):
self.saveparser = (pyparsing.Optional(pyparsing.Word(pyparsing.nums)|'*')('idx') +
pyparsing.Optional(pyparsing.Word(self.legalChars + '/\\'))('fname') +
pyparsing.stringEnd)
self.saveparser.setName('saveParser')
self._parsers.add('saveParser')
def _init_outputParser(self):
# outputParser = (pyparsing.Literal('>>') | (pyparsing.WordStart() + '>') | pyparsing.Regex('[^=]>'))('output')
self.outputParser = self.redirector * 2 | (pyparsing.WordStart() + self.redirector) | pyparsing.Regex('[^=]' + self.redirector)('output')
self.outputParser.setName('outputParser')
self._parsers.add('outputParser')
def _init_inputParser(self):
# a not-entirely-satisfactory way of distinguishing < as in "import from" from <
# as in "lesser than"
self.inputParser = self.inputMark + \
pyparsing.Optional(self.inputFrom) + \
pyparsing.Optional('>') + \
pyparsing.Optional(self.fileName) + \
(pyparsing.stringEnd | '|')
self.inputParser.ignore(self.commentInProgress)
self.inputParser.setName('inputParser')
self._parsers.add('inputParser')
def _init_blankLineTerminationParser(self):
self.blankLineTerminationParser = pyparsing.NoMatch
if not self.blankLinesAllowed:
self.blankLineTerminationParser = ((self.multilineCommand ^ self.oneLineCommand) + pyparsing.SkipTo(self.blankLineTerminator, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('args') + self.blankLineTerminator )
# FIXME: Does this call *really* have to be reassigned into the variable???
self.blankLineTerminationParser = self.blankLineTerminationParser.setResultsName('statement')
self.blankLineTerminationParser.setName('blankLineTerminationParser')
self._parsers.add('blankLineTerminationParser')
def _init_multilineParser(self):
#self.multilineParser = self.multilineParser.setResultsName('multilineParser')
self.multilineParser = (
(
(self.multilineCommand('multilineCommand') ^ self.oneLineCommand)
+ pyparsing.SkipTo(self.terminatorParser, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('args')
+ self.terminatorParser
)('statement')
+ pyparsing.SkipTo(
self.outputParser ^ self.pipe ^ self.stringEnd, ignore=self.doNotParse
).setParseAction(lambda x: x[0].strip())('suffix')
+ self.afterElements)
self.multilineParser.ignore(self.commentInProgress)
self.multilineParser.setName('multilineParser')
self._parsers.add('multilineParser')
def _init_singleLineParser(self):
#self.singleLineParser = self.singleLineParser.setResultsName('singleLineParser')
self.singleLineParser = ((self.oneLineCommand + pyparsing.SkipTo(self.terminatorParser ^ self.stringEnd ^ self.pipe ^ self.outputParser, ignore=self.doNotParse).setParseAction(lambda x:x[0].strip())('args'))('statement') +
pyparsing.Optional(self.terminatorParser) + self.afterElements)
self.singleLineParser.setName('singleLineParser')
self._parsers.add('singleLineParser')
def _init_optionParser(self):
# Different from the other parsers.
# This one is based on optparse.OptionParser,
# not pyparsing.
#
# It's included here to keep all parsing-related
# code under one roof.
# TODO: Why isn't this using cmd2's OptionParser?
self.optionParser = optparse.OptionParser()
self._parsers.add('optionParser')
def parseString(self, *args, **kwargs):
'''Parses a string using `self.mainParser`.'''
return self.mainParser.parseString(*args, **kwargs)
There you have it. The ugly truth. ☺
Edited 2012-11-12: I incorrectly used the term “class attribute” in the original title for this question. It‘s a silly mistake, and I apologize for any confusion. It has now been corrected to “instance attribute”.
Define self.multilineCommands
as a Forward, like this:
self.multlineCommands = Forward()
and then define the rest of your grammar using self.multilineCommands
as you would normally. In your tests, “inject” different expressions for self.multilineCommands
using the <<
operator:
self.multilineCommands << (test expression 1)
Then when you parse using the overall grammar, your pyparsing test expression will be used where ever self.multilineCommands
is.
(Note:
Be sure to enclose the right-hand side in ()
’s to guard against precedence of operations problems due to my unfortunate choice of <<
for this operator. In the next release of pyparsing, I’ll add support for <<=
and deprecate <<
for this operation, which will resolve most of this problem.)
EDIT
Here is a flexible parser that has a write-only property that will accept a list of strings to take as allowed keywords. The parser itself is a simple function call parser that parses functions that take a single numeric argument, or the constants pi
or π
or e
.
# coding=UTF-8
from pyparsing import *
class FlexParser(object):
def __init__(self, fixedPart):
self._dynamicExpr = Forward()
self.parser = self._dynamicExpr + fixedPart
def _set_keywords(self, kw_list):
# accept a list of words, convert it to a MatchFirst of
# Keywords defined using those words
self._dynamicExpr << (MatchFirst(map(Keyword, kw_list)))
keywords = property(fset=_set_keywords)
def parseString(self,s):
return self.parser.parseString(s)
E = CaselessKeyword("e").setParseAction(replaceWith(2.71828))
PI = (CaselessKeyword("pi") | "π").setParseAction(replaceWith(3.14159))
numericLiteral = PI | E | Regex(r'[+-]?\d+(\.\d*)?').setParseAction(lambda t:float(t[0]))
fp = FlexParser('(' + numericLiteral + ')')
fp.keywords = "sin cos tan asin acos atan sqrt".split()
print fp.parseString("sin(30)")
print fp.parseString("cos(π)")
print fp.parseString("sqrt(-1)")
Now change the keywords by just assigning a word list to the keywords
property. The setter method converts the list to a MatchFirst of Keywords. Note that now, parsing "sin(30)" will raise an exception:
fp.keywords = "foo bar baz boo".split()
print fp.parseString("foo(1000)")
print fp.parseString("baz(e)")
print fp.parseString("bar(1729)")
print fp.parseString("sin(30)") # raises a ParseException