I cannot figure out how to group zero or more repeating sections in text with pyparsing. In other words, I want to merge multiple matches into one named result set. Note, I want to use pyparsing as I have a lot of sections with different rules.
from pyparsing import *
input_text = """
Projects
project a created in c#
Education
university of college
Projects
project b created in python
"""
project_marker = LineStart() + Literal('Projects') + LineEnd()
education_marker = LineStart() + Literal('Education') + LineEnd()
markers = project_marker ^ education_marker
project_section = Group(
project_marker + SkipTo(markers | stringEnd).setResultsName('project')
).setResultsName('projects')
education_section = Group(
education_marker + SkipTo(markers | stringEnd).setResultsName('education')
).setResultsName('educations')
sections = project_section ^ education_section
text = StringStart() + SkipTo(sections | StringEnd())
doc = Optional(text) + ZeroOrMore(sections)
result = doc.parseString(input_text)
print(result)
# ['', ['Projects', '\n', 'project a created in c#'], ['Education', '\n', 'virginia tech'], ['Projects', '\n', 'project b created in python']]
print(result.projects)
# ['Projects', '\n', 'project b created in python']
print(result.projects[0].project)
# AttributeError: 'str' object has no attribute 'project'
Thanks to @PaulMcG the solution is to add listAllMatches=True
to setResultsName
, see https://pythonhosted.org/pyparsing/pyparsing.ParserElement-class.html#setResultsName.
project_marker = LineStart() + Literal('Projects') + LineEnd()
education_marker = LineStart() + Literal('Education') + LineEnd()
markers = project_marker ^ education_marker
project_section = Group(
project_marker + SkipTo(markers | stringEnd).setResultsName('project')
).setResultsName('projects', listAllMatches=True)
education_section = Group(
education_marker + SkipTo(markers | stringEnd).setResultsName('education')
).setResultsName('educations', listAllMatches=True)
sections = project_section ^ education_section
text = StringStart() + SkipTo(sections | StringEnd())
doc = Optional(text) + ZeroOrMore(sections)
result = doc.parseString(input_text)