Search code examples
pythonlark-parser

how does one match EOL (newline) with lark?


I'm using the lark parser with python. I'd like to use EOL as part of the grammar since it is line oriented. I'm getting an error when I try to put the regex in for matching EOL. I see some examples like this:

CR : /\r/
LF : /\n/
NEWLINE: (CR? LF)+

but they don't work for me. this is my code:

import sys
import lark

class Parser:
    grammar = '''
        start : STRING EOL
        STRING : /\w+/
        EOL : /\n/x
    '''

    parser = lark.Lark(grammar)

    def __init__(self, fname):
        self.fname = fname
        self.ast(open(fname))

    def ast(self, fh):
        tree = self.parser.parse(fh.read())
        print(tree.pretty())

def main():
    x = Parser(sys.argv[1])

main()

and here's the error I get:

Traceback (most recent call last):
  File "./p2.py", line 6, in <module>
    class Parser:
  File "./p2.py", line 13, in Parser
    parser = lark.Lark(grammar)
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/lark.py", line 413, in __init__
    self.parser = self._build_parser()
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/lark.py", line 456, in _build_parser
    return parser_class(self.lexer_conf, parser_conf, options=self.options)
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/parser_frontends.py", line 242, in __call__
    return ParsingFrontend(lexer_conf, parser_conf, options)
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/parser_frontends.py", line 61, in __init__
    self.parser = create_parser(lexer_conf, parser_conf, options)
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/parser_frontends.py", line 209, in create_earley_parser
    return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/parser_frontends.py", line 186, in create_earley_parser__dynamic
    earley_matcher = EarleyRegexpMatcher(lexer_conf)
  File "/grid/common/pkgs/python/v3.7.2/lib/python3.7/site-packages/lark/parser_frontends.py", line 172, in __init__
    raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
lark.exceptions.GrammarError: ("Dynamic Earley doesn't allow zero-width regexps", TerminalDef('EOL', '(?x:\n)'))

Solution

  • I forgot about needing to use raw strings. had to add an "r". the new code looks like:

    class Parser:
        grammar = r'''
            start : STRING EOL
            STRING : /\w+/
            EOL : /\n/
        '''