Parsing nested brace/bracket groups

I'm attempting to parse files that look like this:

MSH
[  PD1  ]
[{ ROL }]
[
  { ROL }
]
[
    {
        PR1
        [{ ROL }]
    }
]
[
    {
        IN1
        [  IN2  ]
        [{ IN3 }]
    }
]
[ ACC ]

Where:

3 single alphanumerics represent a SEGMENT
[ SEGMENT ] represents a Optional Segment
{ SEGMENT } represents a Repeating Segment
[{ SEGMENT }] represents an Optional Repeating Segment
Any of the above SEGMENT profiles can be grouped together in nesting Optional ([]) and/or Repeating ({}) GROUPS.
Examples of nested repeating groups are lines 4 - 19 in the code above.

The ideal result would be something like this:

    {
  "MSH": {
    "name": "placeholder",
    "opt": false,
    "rep": false,
    "description": "Plain Segment"
  },
  "PD1": {
    "name": "placeholder",
    "opt": true,
    "rep": false,
    "description": "Optional Segment"
  },
  // some segments here
  "group": {
    "opt": true,
    "rep": false,
    "description": "Optionals group placeholder text",
    "segment0": {
      "ROL": {
        "name": "placeholder",
        "opt": false,
        "rep": true,
        "description": "Repeating Segment"
      }
    }
  }
}

I've read most of the pyparsing posts on SO and the Pyparsing wiki, including the fourFn.py examples and the regexinverter. I believe I need to use Infixnotation but I'm not quite understanding how to use it.

This is what I have so far:

lbrack = pp.Literal("[")
rbrack = pp.Literal("]")
lbrace = pp.Literal("{")
rbrace = pp.Literal("}")
segment = pp.Word(pp.alphanums,exact=3)
optsegment = lbrack + segment + rbrack
repsegment = lbrace + segment + rbrace
optrepsegment = lbrack + lbrace + segment + rbrace + rbrack


segments = (segment.setResultsName("RawSegment") |
           optsegment.setResultsName("OptionalSegment") |
           repsegment.setResultsName("RepeatingSegment") |
           optrepsegment.setResultsName("OptionalRepeatingSegment"))

opt_group = pp.Group(lbrack + segments + rbrack)
rep_group = pp.Group(lbrace + segments + rbrace)

message = pp.Group(segments | opt_group | rep_group)

expr = pp.infixNotation(message,
            [
            ('[', 2, pp.opAssoc.LEFT),
            ('{', 2, pp.opAssoc.LEFT),
            ('}', 1, pp.opAssoc.RIGHT),
            (']', 1, pp.opAssoc.RIGHT),
            ])

msg = message.searchString(data)

for item in msg:
    print(item)

I havent hammered out the output format yet, I'm just trying to get the output parsed correctly at this point.

Solution

Here is the code with lark:

import json
import lark

l = lark.Lark("""
start: _segment
SIMPLE_SEGMENT: ("_"|LETTER|DIGIT)("_"|LETTER|DIGIT)("_"|LETTER|DIGIT)
o_segment: "["_segment"]"
r_segment: "{"_segment"}"
_segment: (SIMPLE_SEGMENT|o_segment|r_segment)+
%import common.LETTER
%import common.DIGIT
%import common.WS
%ignore WS
""", parser='lalr') # using lalr as parser is better than the default parser


class TreeTransformer(lark.Transformer):

    @staticmethod
    def o_segment(content):
        if len(content) == 1 and isinstance(content[0], tuple) and content[0][0] == 'rep':
            return "rep_opt", content[0][1]
        return "opt", tuple(content) if len(content) != 1 else content[0]

    @staticmethod
    def r_segment(content):
        return "rep", tuple(content) if len(content) != 1 else content[0]

    def start(self, content):
        out = []
        for token in content:
            if isinstance(token, str):
                out.append({"name": "placeholder",
                            "opt": False,
                            "rep": False,
                            "description": "Plain Segment",
                            "token_name": token})
            else:
                if isinstance(token[1], str):
                    opt = 'opt' in token[0]
                    rep = 'rep' in token[0]
                    out.append({"name": "placeholder",
                                "opt": opt,
                                "rep": rep,
                                "description": ("Optional " if opt else '') + ("Repeating " if opt else '') + "Segment",
                                "token_name": token[1]})
                else:
                    opt = 'opt' in token[0]
                    rep = 'rep' in token[0]
                    out.append({"name": "placeholder",
                                "opt": opt,
                                "rep": rep,
                                "description": ("Optional " if opt else '') + ("Repeating " if opt else '') + "Group",
                                "segments": self.start(token[1])})
        return out


transformer = TreeTransformer()

tree = l.parse("""
MSH
[  PD1  ]
[{ ROL }]
[
  { ROL }
]
[
    {
        PR1
        [{ ROL }]
    }
]
[
    {
        IN1
        [  IN2  ]
        [{ IN3 }]
    }
]
[ ACC ]
""")

print(json.dumps(transformer.transform(tree), indent=4))